Logo Search packages:      
Sourcecode: t-coffee version File versions  Download package

tree_util.c

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include "io_lib_header.h"
#include "util_lib_header.h"
#include "dp_lib_header.h"
#include "define_header.h"

#define TOPOLOGY 1
#define WEIGHTED 2
#define LENGTH   3
#define RECODE   4

int distance_tree;
int rooted_tree;
int tot_nseq;
static NT_node compute_fj_tree (NT_node T, Alignment *A, int limit, char *mode);
static NT_node compute_cw_tree (Alignment *A);
static NT_node compute_std_tree (Alignment *A, int n, char **arg);
static NT_node tree2fj_tree (NT_node T);


static int compare_node1 ( int *b1, int *b2, int n);
static int compare_node2 ( int *b1, int *b2, int n);
static int find_seq_chain (Alignment *A, int **sim,int *used,int seq0,int seq1, int seq2,int chain_length, int limit, int max_chain, int *nseq);
int new_display_tree (NT_node T, int n);
NT_node display_code (NT_node T, int nseq, FILE *fp);
NT_node display_dist (NT_node T, int n, FILE *fp);
/*********************************************************************/
/*                                                                   */
/*                                   dpa_tree_manipulation           */
/*                                                                   */
/*********************************************************************/
static NT_node code_dpa_tree ( NT_node T, int **D);
NT_node collapse_sub_tree ( NT_node T,int nseq, int *list, char *new_name);
NT_node seq2dpa_tree  (Sequence *S, char *mode)
{
  Constraint_list *CL;
  NT_node **T;
  NT_node Tree;
  CL=declare_constraint_list_simple (S);
  CL->local_stderr=NULL;
  
  
  CL->DM=cl2distance_matrix (CL,NOALN,(mode==NULL)?"ktup":mode, NULL, 0);
  
  T=int_dist2nj_tree ( (CL->DM)->similarity_matrix, S->name, S->nseq, vtmpnam (NULL));
  Tree=T[3][0];
  
  Tree=recode_tree (Tree, S);
  Tree=reset_dist_tree (Tree, -1);
  
  Tree=code_dpa_tree (Tree, (CL->DM)->similarity_matrix);
  free_distance_matrix (CL->DM);
  return Tree;
}
  
NT_node tree2dpa_tree (NT_node T, Alignment *A, char *mode)
{
  /*This Function sets the branches with Length values used by DP*/
  /*The tree must be rooted*/
  Sequence *S;
  int **D;

  S=aln2seq (A);
  T=recode_tree     (T, S);
  T=reset_dist_tree (T, -1);
  D=get_sim_aln_array (A,mode);
    
  T=code_dpa_tree (T, D);
  return T;
}

NT_node code_dpa_tree ( NT_node T, int **D)
{
  if ( !T) return T;
  else if ( T->leaf==1)
    {
      T->dist=100;
      return T;
    }
  else
    {
      int nl, *ll;
      int nr, *lr;
      int a, b, min=100;
      float tot, n=0;
      
      nl=(T->left)->nseq;ll=(T->left)->lseq;
      nr=(T->right)->nseq;lr=(T->right)->lseq;

      for (tot=0,n=0, a=0; a< nl; a++)
      for ( b=0; b< nr; b++, n++)
        {
          tot+=D[ll[a]][lr[b]];
          min=MIN(min,(D[ll[a]][lr[b]]));
        }
      /*      T->dist=(mode==AVERAGE)?(tot/n):min:;*/
      T->dist=(n>0)?tot/n:0;
      T->dist=min;
      code_dpa_tree ( T->right, D);
      code_dpa_tree ( T->left, D);
      return T;
    }
}
static int group_number;
char *tree2Ngroup (Alignment *A, NT_node T, int max_n, char *fname, char *mat)
{
  double top, bot, mid, pmid;
  Sequence *S;
  int n;
  
  

  if (!T)
    {
      char **list;

      list=declare_char ( 2, 100);
      sprintf (list[0], "%s",mat); 
      
      fprintf ( stderr, "\nCompute Phylogenetic tree [Matrix=%s]", mat);
      T=compute_std_tree(A,1, list);
      fprintf ( stderr, "\nCompute dpa tree");
      T=tree2dpa_tree (T,A, mat);
    }
  
  S=tree2seq(T, NULL);
  
  if ( max_n<0)
    {
      max_n*=-1;
      n=tree2group_file (T,S,0, max_n, fname);
      fprintf ( stderr, "\n#TrimTC: Split in %d Groups at a minimum of %d%% ID\n",n, (int)max_n);
      return fname;
      
    }
  else if ( max_n>0)
    {
      if ( max_n>S->nseq)max_n=S->nseq;
            
      top=100; bot=0;
      pmid=0; mid=50;
      n=tree2group_file(T, S,0, (int)mid,fname);
      mid=dichotomy((double)n, (double)max_n,(pmid=mid), &bot, &top);
      while (n!=max_n && (int)pmid!=(int)mid)
      {
        n=tree2group_file(T, S,0, (int)mid, fname);
        mid=dichotomy((double)n, (double)max_n,(pmid=mid), &bot, &top);
      }
      fprintf ( stderr, "\nDONE2");
      fprintf ( stderr, "\n#TrimTC: Split in %d Groups at a minimum of %d%% ID\n",n, (int)mid);
      return fname;
    }
  return NULL;
}  
static int group_number;
int tree2group_file ( NT_node T,Sequence *S, int maxnseq, int minsim, char *name)
  {
    FILE *fp;

    
    fp=vfopen (name, "w");
    vfclose (tree2group (T, S,maxnseq,minsim, "tree2ngroup",fp));
   
    return count_n_line_in_file(name);
  }
    
      
FILE * tree2group ( NT_node T,Sequence *S, int maxnseq, int minsim,char *name, FILE *fp)
{
  if ( !T)return fp;
  else
    {
      int m,d;

      m=(maxnseq==0)?S->nseq:maxnseq;
      d=minsim;


      
      if ( T->nseq<=m && T->dist>=d)
      {
        int a;
        fprintf ( fp, ">%s_%d ", (name)?name:"", ++group_number);
        for ( a=0; a< T->nseq; a++)
          fprintf ( fp, "%s ", S->name[T->lseq[a]]);
        fprintf (fp, "\n");
        if (!T->parent)group_number=0;
        return fp;
      }
      else
      {
        fp=tree2group (T->right, S, maxnseq, minsim, name,fp);
        fp=tree2group (T->left, S, maxnseq, minsim, name,fp);
        if (!T->parent)group_number=0;
        return fp;
      }
      
    }
}


NT_node  tree2collapsed_tree (NT_node T, int n, char **string)
{
  char ***list;
  Sequence *A;
  int a, *nlist;

  
  A=tree2seq(T, NULL);
  T=recode_tree(T, A);
  list=vcalloc (A->nseq, sizeof (char***));
  nlist=vcalloc (A->nseq, sizeof (int));
  if ( n==0)return T;
  else if (n>1)
    {
      int l;
      char *buf;
      
      for (l=0,a=0; a< n; a++)l+=strlen (string[a]);
      buf=vcalloc ( 2*n+l+1, sizeof (char));
      for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");}     
      list[0]=string2list (buf);
      vfree (buf);
    }
  else if ( file_exists (string[0]))
    {
      list=read_group (string[0]);
    }
  else
    {
      fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM);
      myexit (EXIT_FAILURE);
    }  

  
  a=0;
  while (list[a])
    {
      int i, b;
      n=atoi (list[a][0]);
      for (b=0; b<A->nseq; b++)nlist[b]=0;
      for (b=2; b<n; b++)
      {
        i=name_is_in_list (list[a][b], A->name, A->nseq, MAXNAMES);
        nlist[i]=1;
      }
      T=collapse_sub_tree ( T,A->nseq,nlist,list[a][1]);
      free_char (list[a], -1);
      a++;
    }
  vfree (list);
  return T;
}

NT_node collapse_sub_tree ( NT_node T,int nseq, int *list, char *new_name)
{
  if (!T) return T;
  else
    {
      int a=0;
 

      while (a<nseq && list[a]==T->lseq2[a]){a++;} 
      if (a==nseq)
      {
        sprintf ( T->name, "%s", new_name);
        T->leaf=T->isseq=1;
        T->left=T->right=NULL;
        return T;
      }
      else
      {
       collapse_sub_tree (T->right, nseq, list, new_name);
       collapse_sub_tree (T->left, nseq, list, new_name);
       return T;
      }
    }
}
  
/*********************************************************************/
/*                                                                   */
/*                                   tree pruning                    */
/*                                                                   */
/*                                                                   */
/*********************************************************************/
NT_node remove_leaf ( NT_node T);
NT_node prune_root (NT_node T);
NT_node main_prune_tree ( NT_node T, Sequence *S)
{
  T=prune_tree ( T, S);
  return T;
}

NT_node prune_tree ( NT_node T, Sequence *S)
{
  
  if (!T ) return T;
    
  if (T->leaf && T->isseq && name_is_in_list (T->name,S->name, S->nseq, 100)==-1)
    {
      NT_node C, P, PP;
      
      P=T->parent;
      C=(P->right==T)?P->left:P->right;
      PP=C->parent=P->parent;
            
      if (PP && PP->right==P)PP->right=C;
      else if (PP)PP->left=C;
      else
      {
        if (T==P->right)P->right=NULL;
        else P->left=NULL;
        T=C;
        
      }
    }
  else
    {
      prune_tree (T->left, S);
      prune_tree (T->right, S);
    }
  return prune_root(T);
}

NT_node prune_root (NT_node T)
{
  //This function prunes the root if needed (and frees it).
  if (T->parent)return T;
  
  if (!T->right && T->left)
    {
      return prune_root (T->left);
    }
  else if (T->right && !T->left)
     {
      
       return prune_root (T->right);
    }
  else
    {
      return T;
    }
}
/*********************************************************************/
/*                                                                   */
/*                                   tree comparison                 */
/*                                                                   */
/*                                                                   */
/*********************************************************************/
int main_compare_cog_tree (NT_node T1, char *cogfile)
{
  char ***array;
  int a, nbac, n=0, p, c, b;
  Alignment *A;
  
  array=file2list(cogfile, ";\n");
  nbac=atoi(array[0][0])-2;
  
  A=declare_aln2 (nbac+1, 10);
  for (a=0; a<nbac; a++)
    {
      sprintf ( A->name[a], "%s", array[0][a+2]);
      A->seq_al[a][0]='a';
      A->seq_al[a][1]='\0';
      
    }
  sprintf ( A->name[nbac], "cons");
  
  A->nseq=nbac+1;
  A->len_aln=1;
  
  
  n=3;
  while (array[n]!=NULL)
    {
      for (b=0; b<nbac; b++)
      {
        p=atoi (array[1][b+2]);p=(p==0)?'O':'I';
        c=atoi (array[n][b+2]);c=(c==0)?'O':'I';
        A->seq_al[b][0]=p;
        A->seq_al[b][1]=c;
        A->seq_al[b][2]='\0';
       
      }
      sprintf (A->file[0], "%s", array[n][1]);
      A->len_aln=2;
      main_compare_aln_tree (T1, A, stdout);
      n++;
    }
  return n;
}

      
int main_compare_aln_tree (NT_node T1, Alignment *A, FILE *fp)
{
  int n=0;
  
  fprintf ( fp, "\nTOT_CLASH COG %s N %d", A->file[0], compare_aln_tree (T1, A, &n, fp));
  vfclose (fp);
  return n;
}

int compare_aln_tree (NT_node T, Alignment *A, int *n, FILE *fp)
{
  if (T->leaf)
    {
      int i;
      i=name_is_in_list (T->name, A->name, A->nseq, 100);
      T->seqal=A->seq_al[i];
      return 0;
    }
  else
    {
      char *seq1, *seq2;
      if (!(T->left )->seqal)compare_aln_tree (T->left, A,n, fp);
      if (!(T->right)->seqal)compare_aln_tree (T->right, A,n, fp);
      
      seq1=(T->left)->seqal;
      seq2=(T->right)->seqal;
      (T->left)->seqal=(T->right)->seqal=NULL;
      if ( seq1 && seq2)
      {
        if (strm (seq1, seq2))
          {
            T->seqal=seq1;
            
          }
        else
          {
         
            if (seq1[0]!=seq2[0] && seq1[1]!=seq2[1])
            {
              fprintf ( fp, "\nNODE_CLASH: COG %s (%s,%s):(",A->file[0],seq1,seq2 );
              display_leaf_below_node (T->left, fp);
              fprintf ( fp, ");("); 
              display_leaf_below_node (T->right, fp);
              fprintf ( fp, ")"); 
              n[0]++;
            }
          }
      }
    }
  return n[0];
}

int new_compare_trees ( NT_node T1, NT_node T2, int nseq, Tree_sim *TS);
NT_node new_search_split (NT_node T, NT_node B, int nseq);
int new_compare_split ( int *b1, int *b2, int n);

NT_node main_compare_trees ( NT_node T1, NT_node T2, FILE *fp)
{
  Sequence *S1, *S2, *S;
  int n;
  float t1, t2, t, w1, w2,w3, w4, w, l1, l;
  int debug=0, a;
  Tree_sim *TS1, *TS2;
  
  if ( tree_contains_duplicates (T1))
    {
      display_tree_duplicates (T1);
      printf_exit (EXIT_FAILURE, stderr, "\nFirst Tree Contains Duplicated Sequences [main_compare_trees][FATAL:%s]", PROGRAM);
     
    }
  else if ( tree_contains_duplicates (T2))
    {
      display_tree_duplicates (T2);
      printf_exit (EXIT_FAILURE, stderr, "\nSecond Tree Contains Duplicated Sequences [main_compare_trees]");
      
    }
  
  //Identify the commom Sequence Set  S1=tree2seq(T1, NULL);
  S2=tree2seq(T2, NULL);
  S=trim_seq ( S1, S2);
  
  //Prune the trees and recode the subtree list
  T1=prune_tree (T1, S);
  T1=recode_tree(T1, S);
  
  T2=prune_tree (T2, S);  
  T2=recode_tree(T2, S);
  
  if (getenv4debug("DEBUG_TREE_COMPARE")|| debug)display_code (T1, S->nseq, stdout);
  if (getenv4debug("DEBUG_TREE_COMPARE")|| debug)display_code (T2, S->nseq, stdout);
  
  if (getenv4debug("DEBUG_TREE_COMPARE")|| debug)fprintf ( stderr, "\ndisplay 1");
  if (getenv4debug("DEBUG_TREE_COMPARE")|| debug)display_tree ( T1, S->nseq, stderr);
  if (getenv4debug("DEBUG_TREE_COMPARE")|| debug)fprintf ( stderr, "\ndisplay 2");
  if (getenv4debug("DEBUG_TREE_COMPARE")|| debug)display_tree ( T2, S->nseq, stderr);
  
  
  //Compare the trees
  TS1=vcalloc (1, sizeof (Tree_sim));
  TS2=vcalloc (1, sizeof (Tree_sim));
  
  n=new_compare_trees ( T1, T2, S->nseq, TS1);
  new_compare_trees ( T2, T1, S->nseq, TS2);
  
  t=(TS1->uw+TS2->uw)*100/(TS1->max_uw+TS2->max_uw);
  w=(TS1->w+TS2->w)*100/(TS1->max_w+TS2->max_w);
  l=(TS1->d+TS2->d)*100/(TS1->max_d+TS2->max_d);
    
  fprintf ( fp, "\n#tree_cpm|T: %.f W: %.2f L: %.2f", t, w, l);
  fprintf ( fp, "\n#tree_cpm|%d Nodes in T1 with %d Sequences",n+1, S->nseq);
  fprintf ( fp, "\n#tree_cmp|T: ratio of identical nodes");
  fprintf ( fp, "\n#tree_cmp|W: ratio of identical nodes weighted with the min Nseq below node");
  fprintf ( fp, "\n#tree_cmp|L: average branch length similarity\n");
    
  free_sequence (S, -1);
  free_sequence (S1, -1);
  free_sequence (S2, -1);
  
  return T1;
}  

int new_compare_trees ( NT_node T1, NT_node T2, int nseq, Tree_sim *TS)
{
  int n=0;
  NT_node N;
  float t1, t2;
  
  if (!T1 || !T2) return 0;
  
  n+=new_compare_trees (T1->left,  T2, nseq,TS);
  n+=new_compare_trees (T1->right, T2, nseq,TS);
  
  //Exclude arbitrary splits (dist==0)
  if (T1->dist==0)return n;
  
  N=new_search_split (T1, T2, nseq);
  t1=FABS(T1->dist);
  t2=(N)?FABS(N->dist):0;
  TS->max_d+=MAX(t1, t2);

  if (T1->nseq>1)
    {
      int w;
      w=MIN((nseq-T1->nseq),T1->nseq);
      TS->max_uw++;
      TS->max_w+=w;
      
      if (N)
      {
        TS->uw++;
        TS->w+=w;
        TS->d+=MIN(t1, t2);
        //T1->dist=T1->nseq;
      }
      else
      {
        //T1->dist=T1->nseq*-1;
        ;
      }
    }
  else
    {
      TS->d+=MIN(t1, t2);
      //T1->dist=1;
    }
  return ++n;
}
NT_node new_search_split (NT_node T, NT_node B, int nseq)
{
  NT_node N;
  if (!T || !B) return NULL;
  else if ( new_compare_split (T->lseq2, B->lseq2, nseq)==1)return B;
  else if ( (N=new_search_split (T, B->right, nseq)))return N;
  else return new_search_split (T, B->left, nseq);
}
int new_compare_split ( int *b1, int *b2, int n)
{
  int a, flag;
  
  for (flag=1, a=0; a<n; a++)
    if (b1[a]!=b2[a])flag=0;
  if (flag) return flag;
  
  for (flag=1, a=0; a<n; a++)
    if (b1[a]==b2[a])flag=0;
  return flag;
}
  
float compare_trees ( NT_node T1, NT_node T2, int nseq,int  mode)
{
  /*search each branch of T1 in T2*/
  float n=0, s;
  
  
  if ( !T1 || !T2)return 0;
  
  if (getenv4debug("DEBUG_TREE_COMPARE"))display_node (T1, "\nNODE ", nseq);
  
  if (T1->parent && T1->nseq>1)n+=search_node ( T1, T2, nseq, mode);
    
  n+=compare_trees ( T1->left, T2, nseq, mode);
  n+=compare_trees ( T1->right, T2, nseq, mode);
  
  return n;
}

float search_node ( NT_node B, NT_node T, int nseq, int mode)
{
  int n=0;
  if ( !B || !T) return -1;
  if (getenv4debug("DEBUG_TREE_COMPARE"))display_node ( T, "\n\t", nseq);
  
  n=compare_node ( B->lseq2, T->lseq2, nseq );
  
  if ( n==1) 
    {
      if (getenv4debug("DEBUG_TREE_COMPARE"))fprintf ( stderr, "[1][%d]", (int)evaluate_node_similarity ( B, T, nseq, mode));
      if (mode==RECODE)B->dist=B->leaf;
      return evaluate_node_similarity ( B, T, nseq, mode);
    }
  else if ( n==-1)
    {
      if (getenv4debug("DEBUG_TREE_COMPARE"))fprintf ( stderr, "[-1]"); 
      if (mode==RECODE)B->dist=-B->leaf;
      return 0;
    }
  else
    {
      if (getenv4debug("DEBUG_TREE_COMPARE"))fprintf ( stderr, "[0]");
      n=search_node ( B, T->left, nseq, mode);   
      if ( n>0) return n;
      n=search_node ( B, T->right, nseq, mode);
      if ( n>0) return n;
      n=search_node ( B, T->bot, nseq, mode);
      if ( n>0) return n;
    }
  return n;
}

float evaluate_node_similarity ( NT_node B, NT_node T, int nseq, int mode)
{
int a, c;
 
 if ( mode==TOPOLOGY || mode ==RECODE)
   {
     for ( a=0; a< nseq; a++)
       if ( B->lseq2[a]!=T->lseq2[a]) return 0;
     return 1;
   }
 else if ( mode == WEIGHTED)
   {
     for (c=0, a=0; a< nseq; a++)
       {
       if ( B->lseq2[a]!=T->lseq2[a]) return 0; 
       else c+=B->lseq2[a];
       }      
     return (float)(MIN(c,nseq));
   }
 else if ( mode == LENGTH )
   {
     float d1, d2;
     
     for (c=0, a=0; a< nseq; a++)
       {
       if ( B->lseq2[a]!=T->lseq2[a]) return 0; 
       }
     d1=FABS((B->dist-T->dist));
     d2=MAX(B->dist, T->dist);
     return (d2>0)?(d1*100)/d2:0;
   }
 else
   {
     return 0;
   }
}
int compare_node ( int *b1, int *b2, int nseq)
{
  int n1, n2;
  
  n1=compare_node1 ( b1, b2, nseq);
  /*fprintf ( stderr, "[%d]", n1);*/
  if ( n1==1) return 1;
  
  n2=compare_node2 ( b1, b2, nseq);
  /* fprintf ( stderr, "[%d]", n2);*/
  if ( n2==1)return 1;
  else if ( n2==-1 && n1==-1) return -1;
  else return 0;
}
int compare_node1 ( int *b1, int *b2, int n)
{
  int a;
  int l1, l2;
  int r=1;
  for ( a=0; a< n; a++)
    {
      l1=b1[a];
      l2=b2[a];
      if ( l1==1 && l2==0) return -1;
      if ( l1!=l2)r=0;
    }
  return r;
}
int compare_node2 ( int *b1, int *b2, int n)
{
  int a;
  int l1, l2;
  int r=1;
  
  for ( a=0; a< n; a++)
    {
      l1=1-b1[a];
      l2=b2[a];
      if ( l1==1 && l2==0) return -1;
      if ( l1!=l2) r=0;
    }
  return r;
}
void display_node (NT_node N, char *string,int nseq)
{
  int a;
  fprintf ( stderr, "%s", string);
  for (a=0; a< nseq; a++)fprintf ( stderr, "%d", N->lseq2[a]);
}


/*********************************************************************/
/*                                                                   */
/*                                   FJ_tree Computation             */
/*                                                                   */
/*                                                                   */
/*********************************************************************/


NT_node tree_compute ( Alignment *A, int n, char ** arg_list)
{
  if (n==0 || strm (arg_list[0], "cw"))
    {
      return compute_cw_tree (A);
    }
  else if ( strm (arg_list[0], "fj"))
    {
      return compute_fj_tree ( NULL, A, (n>=1)?atoi(arg_list[1]):8, (n>=2)?arg_list[2]:NULL);
    }
  
  else if ( ( strm (arg_list[0], "nj")))
    {
      return compute_std_tree (A, n, arg_list);
    }
  else
    return compute_std_tree (A, n, arg_list);
}

NT_node compute_std_tree (Alignment *A, int n, char **arg_list)
{
  int **s, i;  
  NT_node T;
  char *tree_name;
  
  tree_name =vtmpnam (NULL);
    
  if ( name_is_in_list ("aln",arg_list,n, 3)!=-1)
    {
      s=get_sim_aln_array ( A, "idmat");
    }
  else if ( name_is_in_list ( "sarmat", arg_list,n,10)!=-1)
    {
      s=get_sim_aln_array(A, "sarmat");
    }
  else if ((i=name_is_in_list ("ktup", arg_list,n,4))!=-1)
    {
      ungap_array (A->seq_al, A->nseq);
      s=get_sim_aln_array ( A,arg_list[i] );
    }
  else
    s=get_sim_aln_array ( A, "idmat");
  
  s=sim_array2dist_array(s, 100);
  int_dist2nj_tree (s, A->name, A->nseq, tree_name);
  T=main_read_tree(tree_name);

  if ( name_is_in_list ( "dpa", arg_list,n, 3)!=-1)
    {
      s=dist_array2sim_array(s, 100);
      T=code_dpa_tree (T,s);
    }
  return T;
}
        
NT_node similarities_file2tree (char *mat)
{
  int **s;
  Alignment *A;
  char *tree_name;
  NT_node T;

  

  tree_name =vtmpnam (NULL);
  
  s=input_similarities (mat,NULL, NULL);
  
  
  A=similarities_file2aln(mat);
  s=sim_array2dist_array(s, 100);
 
  
  int_dist2nj_tree (s, A->name, A->nseq, tree_name);
  T=main_read_tree(tree_name);
  free_int (s, -1);
  return T;
}
  
NT_node compute_cw_tree (Alignment *A)
{
  char *tmp1, *tmp2, tmp3[1000];
  char command[1000];
  
  tmp1=vtmpnam (NULL);
  tmp2=vtmpnam (NULL);

  sprintf ( tmp3, "%s.ph", tmp1);
  output_clustal_aln (tmp1, A);
  sprintf ( command, "clustalw -infile=%s -tree -newtree=%s %s ", tmp1,tmp3, TO_NULL_DEVICE);
  my_system ( command);
  sprintf ( command, "mv %s %s", tmp3, tmp2);
  my_system ( command);
  return main_read_tree(tmp2);
}

NT_node compute_fj_tree (NT_node T, Alignment *A, int limit, char *mode)
{
  static int in_fj_tree;
  if (!in_fj_tree)fprintf ( stderr, "\nComputation of an NJ tree using conserved positions\n");
  
  in_fj_tree++;
  if (T && T->leaf<=2);
  else
    {
      T=aln2fj_tree(T,A,limit, mode);
      T->right=compute_fj_tree ( T->right, A, limit, mode);
      T->left=compute_fj_tree  ( T->left, A, limit, mode);
    }
  in_fj_tree--;
  return T;
}


NT_node aln2fj_tree(NT_node T, Alignment *A, int limit_in, char *mode)
{
  NT_node NT;
  Sequence *S=NULL;
  Alignment *subA=NULL;
  int fraction_gap;
  int l, limit;
  
  if (T)
    S=tree2seq (T,NULL);
  else
    S=aln2seq (A);

  
  l=0;
  for ( fraction_gap=100; fraction_gap<=100 && l<1; fraction_gap+=10)
    for ( limit=limit_in; limit>0 && l<1; limit--)
      {
      fprintf ( stderr, "\n%d %d", limit, fraction_gap);
      free_aln (subA);
      subA=extract_sub_aln2 (A,S->nseq,S->name);
      subA=filter_aln4tree (subA,limit,fraction_gap, mode);
      l=subA->len_aln;
      }

  /*  while ( subA->len_aln<1)
    {
    subA=extract_sub_aln2 (A,S->nseq,S->name);
    subA=filter_aln4tree (subA,limit,fraction_gap,mode);
    free_aln (subA);
    subA=extract_sub_aln2 (A,S->nseq,S->name);
    subA=filter_aln4tree (subA,--limit,fraction_gap, mode);
    }
  */
  NT=aln2tree (subA);
  NT=tree2fj_tree (NT);

  NT=realloc_tree (NT,A->nseq); 
  fprintf ( stderr, "Limit:%d Gap: %d Columns: %4d Left: %4d Right %4d BL:%4.2f\n",limit,fraction_gap,  subA->len_aln, (NT->right)->leaf,(NT->left)->leaf, (NT->left)->dist+(NT->right)->dist);
  
  if ( T)
    {
      NT->dist=T->dist;
      NT->parent=T->parent;
    }
  free_tree(T);
  free_aln (subA);
  free_sequence (S, -1);
  return NT;
}  
  
Alignment * filter_aln4tree (Alignment *A, int n,int fraction_gap,char *mode)
{
  char *aln_file;
  char *ungaped_aln_file;
  char *scored_aln_file;
  char *filtered_aln_file;
  char command[1000];

  
  aln_file=vtmpnam(NULL);
  ungaped_aln_file=vtmpnam (NULL);
  scored_aln_file=vtmpnam (NULL);
  scored_aln_file=vtmpnam(NULL);
  filtered_aln_file=vtmpnam(NULL);
  
  

  output_clustal_aln (aln_file, A);
  /* 1: remove columns with too many gaps*/
  sprintf ( command, "t_coffee -other_pg seq_reformat -in %s -action +rm_gap %d -output clustalw > %s", aln_file,fraction_gap, ungaped_aln_file);
  my_system ( command);
  /* 2: evaluate the alignment*/
  
  sprintf ( command, "t_coffee -other_pg seq_reformat -in %s -action +evaluate %s -output clustalw > %s", ungaped_aln_file,(mode)?mode:"categories", scored_aln_file);
  my_system ( command);
  
  /*3 extract the high scoring columns*/
  sprintf ( command, "t_coffee -other_pg seq_reformat -in %s -struc_in %s -struc_in_f number_aln -action +use_cons +keep '[%d-8]' +rm_gap -output clustalw > %s", ungaped_aln_file, scored_aln_file,n, filtered_aln_file);
  my_system ( command);
  
  free_aln (A);
  
  A=main_read_aln ( filtered_aln_file, NULL);
  print_aln (A);
  
  return A;
}

NT_node tree2fj_tree (NT_node T)
{
  NT_node L;
  
  return T;

  L=find_longest_branch (T, NULL);
  T=reroot_tree (T, L);
  return T;
}


/*********************************************************************/
/*                                                                   */
/*                                   Tree Filters and MAnipulation   */
/*                                                                   */
/*                                                                   */
/*********************************************************************/
NT_node aln2tree (Alignment *A)
{
  NT_node **T=NULL;
  

  T=make_nj_tree (A, NULL, 0, 0, A->seq_al, A->name, A->nseq, NULL, NULL);
  tree2nleaf (T[3][0]);
  
  return T[3][0];
}
NT_node realloc_tree ( NT_node R, int n)
{
  if ( !R)return R;
  if ( !R->leaf) 
    {
      R->right=realloc_tree (R->right,n);
      R->left=realloc_tree (R->left,n);
      R->bot=realloc_tree (R->bot,n);
    }
  R->lseq=vrealloc (R->lseq, n*sizeof (int));
  R->lseq2=vrealloc (R->lseq2, n*sizeof (int));
  return R;
}

NT_node reset_boot_tree ( NT_node R, int n)
{
  if ( !R)return R;
  if ( !R->leaf) 
    {
      
      R->right=reset_boot_tree (R->right,n);
      R->left=reset_boot_tree (R->left,n);
      R->bot=reset_boot_tree (R->bot,n);
    }
  R->bootstrap=(float)n;
          
  return R;
}

NT_node reset_dist_tree ( NT_node R, float n)
{
  if ( !R)return R;
  if ( !R->leaf) 
    {
      
      R->right=reset_dist_tree (R->right,n);
      R->left=reset_dist_tree (R->left,n);
      R->bot=reset_dist_tree (R->bot,n);
    }
  if (R->parent && !(R->parent)->parent && !(R->parent)->bot)R->dist=n/2;
  else R->dist=n;

  return R;
}

NT_node free_tree ( NT_node R)
{
  if ( !R)return R;
  if ( R->leaf!=1) 
    {
      R->right=free_tree (R->right);
      R->left=free_tree (R->left);
      R->bot=free_tree (R->bot);
    }
  free_tree_node (R);
  return R;
}

NT_node free_tree_node ( NT_node R)
{
  vfree ( R->name);
  vfree ( R->lseq); vfree ( R->lseq2);
  vfree (R);
  return NULL;
}

NT_node   rename_seq_in_tree ( NT_node R, char ***list)
{
  if ( !R || !list) return R;

  if ( R->leaf!=1) 
    {
      R->right=rename_seq_in_tree (R->right, list);
      R->left=rename_seq_in_tree (R->left, list);
      R->bot=rename_seq_in_tree (R->bot, list);
    }
  else
    {
      int n=0;
      while ( list[n][0][0])
      {
        if ( strm (list[n][0], R->name))sprintf (R->name, "%s",list[n][1]);
        n++;
      }
    }
  return R;
}
Sequence * tree2seq    (NT_node R, Sequence *S)
{

  if ( !R)return S;
  if ( !S)
    {
      S=declare_sequence (10, 10, tree2nseq (R));
      S->nseq=0;
    }
  
  if (R->leaf==1)
    {
      sprintf ( S->name[S->nseq++], "%s", R->name);
    }
  else
    {
      S=tree2seq (R->left, S);
      S=tree2seq (R->right, S);
      S=tree2seq (R->bot, S);
    }
  return S;
}

NT_node balance_tree (NT_node T)
{
  static int **list;
  NT_node NL[3];
  
  if ( !T) return T;
  else if ( T->leaf<=2)return T;
  else    
    {
      if (!list)list=declare_int (3, 2);
      
      NL[0]=T->left;
      NL[1]=T->right;
      NL[2]=T->bot;
      
      list[0][0]=(T->left)?(T->left)->leaf:0;
      list[0][1]=0;
      list[1][0]=(T->right)?(T->right)->leaf:0;
      list[1][1]=1;
      list[2][0]=(T->bot)?(T->bot)->leaf:0;
      list[2][1]=2;
      
      sort_int (list,2,0,0,2);
      
      T->left=NL[list[2][1]];
      T->right=NL[list[1][1]];
      T->bot=NL[list[0][1]];
      
      T->left=balance_tree (T->left);
      T->right=balance_tree (T->right);
      T->bot=balance_tree (T->bot);
      return T;   
    }
}
FILE * display_tree (NT_node R, int nseq, FILE *fp)
{
  int a;
  
  if ( !R);
  else 
    {
      /*
      if ( R->nseq==1)fprintf (stderr,"\n[%s] ", R->name);
      else fprintf ( stderr, "\n[%d Node] ",R->nseq);
      for ( a=0; a< R->nseq; a++) fprintf ( stderr, "[%d]", R->lseq[a]);
      */
      fprintf (fp, "\n %10s N ", R->name);
      for ( a=0; a< nseq; a++)fprintf (fp, "%d", R->lseq2[a]);
      fprintf (fp, "\n %10s D ", R->name);
      for ( a=0; a< nseq; a++)fprintf (fp, "%d", R->idist[a]);
      
      
      if (R->leaf==1) fprintf (fp, " %s", R->name);
      fprintf (fp, " :%.4f", R->dist);
      HERE ("\nGo Left");fp=display_tree (R->left, nseq, fp);
      HERE ("\nGo Right");fp=display_tree (R->right, nseq, fp);
      HERE ("\nGo Bot");fp=display_tree (R->bot, nseq, fp);
    }
  return fp;
}

int tree2nnode ( NT_node R)
{
  int n;
  if ( !R)n=0;
  else if ( R->leaf==1){R->node=1;n=1;}
  else 
    {
      n=1;
      n+=tree2nnode (R->right);
      n+=tree2nnode (R->left);
      n+=tree2nnode (R->bot);
      R->node=n;
    }
  return n;
}
int tree2nleaf (NT_node R)
{
  if ( !R)return 0;
  else if (R->leaf==1){return 1;}
  else if (R->right==NULL && R->left==NULL && R->bot==NULL){R->leaf=1; return 1;}
  else
    {
      int n=0;
      n+=tree2nleaf (R->right);
      n+=tree2nleaf (R->left);
      n+=tree2nleaf (R->bot);
      
      R->leaf=n;
      return n;
    }
}


int tree2nseq ( NT_node R)
{
 return tree2nleaf(R);
}

int tree_file2nseq (char *fname)
{
  FILE *fp;
  char *string;
  int p, a, b, c, n;
  
  string=vcalloc (count_n_char_in_file(fname)+1, sizeof (char));
  
  fp=vfopen (fname, "r");
  n=0;
  while ( (c=fgetc(fp))!=EOF){if (c=='(' || c==')' || c==',' || c==';') string[n++]=c;}
  vfclose (fp);string[n]='\0';
  
  for (n=0, p=1; p<strlen (string); p++)
    {
      a=string[p-1];
      b=string[p];

      if ( a=='(' && b==',')n++;
      else if ( a==',' && b==')')n++;
      else if ( a==',' && b==',')n++;
    }
  if ( getenv4debug("DEBUG_TREE"))fprintf (stderr, "\n[DEBUG_TREE:tree_file2nseq]%s", string);
  vfree (string);
  return n;
}
  

void clear_tree ( NT_node R)
{
  if (!R) return;
  
  R->visited=0;
  
  if ( R->leaf==1);
  else
    {
      clear_tree ( R->right);
      clear_tree ( R->left);
      clear_tree ( R->bot);
    }
}
int display_leaf_below_node (NT_node T, FILE *fp)
{
  int n=0;
  if ( !T)return 0;
  
  if ( T->leaf==1)
    {
      fprintf (fp, " %s", T->name);
      return 1;
    }
  else
    {
      n+=display_leaf_below_node ( T->right, fp);
      n+=display_leaf_below_node ( T->left, fp);
      return n;
    }
}
int display_leaf ( NT_node T, FILE *fp)
{
  int n=0;
  if ( !T)return 0;
  else if ( T->visited)return 0;
  else T->visited=1;
  
  if ( T->leaf==1)
    {
      fprintf (fp, " %s", T->name);
      return 1;
    }
  else
    {
      n+=display_leaf ( T->right, fp);
      n+=display_leaf ( T->left, fp);
      n+=display_leaf ( T->bot, fp);
      return n;
    }
}

NT_node find_longest_branch ( NT_node T, NT_node L)
  {
    
    if ( !L || T->dist>L->dist)
      {
      
      L=T;
      }

    if ( T->leaf==1)return L;
    else 
      {
      L=find_longest_branch ( T->right, L);
      L=find_longest_branch ( T->left,  L);
      return L;
      }
  }
int node2side (NT_node N);
int test_print (NT_node T);
NT_node straighten_node (NT_node N);
NT_node EMPTY;
NT_node Previous;
NT_node reroot_tree ( NT_node TREE, NT_node Right)
{
  /*ReRoots the tree between Node R and its parent*/
  NT_node NR;
  int n1, n2;
  
  if (!EMPTY)EMPTY=vcalloc (1, sizeof (NT_node));
  if ( !Right->parent)return Right;
    
  TREE=unroot_tree (TREE);
  if (Right->parent==NULL && Right->bot)
    Right=Right->bot;
  
  n1=tree2nleaf (TREE);
  
  NR=declare_tree_node(TREE->maxnseq);
  
  NR->right=Right;
  NR->left=Right->parent;
  Right->parent=NR;
  
  Right->dist=Right->dist/2;
  
  if ((NR->left)->right==Right)(NR->left)->right=EMPTY;
  else if ( (NR->left)->left==Right) (NR->left)->left=EMPTY;
  
  Previous=NULL;
  
  
  NR->left=straighten_node (NR->left);
  
  
  
  (NR->left)->parent=NR;
  (NR->left)->dist=Right->dist;
  

     
  n2=tree2nleaf(NR);
  
  if ( n1!=n2){fprintf ( stderr, "\n%d %d", n1, n2);myexit (EXIT_FAILURE);}
  return NR;
}

NT_node straighten_node ( NT_node N)
{
  NT_node Child;
  
  
  if ( N->parent)
    {
      if (N->right==EMPTY)N->right=N->parent;
      else if ( N->left==EMPTY) N->left=N->parent;
      
      Child=N->parent;
      if (Child->right==N)
      {
        Child->right=EMPTY;
      }
      else if (Child->left==N)
      {
        Child->left=EMPTY;
      }
      
      Previous=N;
      Child=straighten_node (Child);
      Child->parent=N;
      Child->dist=N->dist;
      return N;
    }
  else if ( N->bot && N->bot!=Previous)
    {
      if ( N->right==EMPTY)N->right=N->bot;
      else if ( N->left==EMPTY)N->left=N->bot;
      
      N->bot=NULL;
      return N;
    }
  else
    {
      N->bot=NULL;
      return N;
    }
}
int test_print (NT_node T)
{
  if ( !T) 
    {
      fprintf ( stderr, "\nEMPTY");
    }
  else if ( !T->left && !T->right)
    {
      fprintf ( stderr, "\n%s",T->name);
    }
  else
    {
      fprintf ( stderr, "\nGoing Right");
      test_print (T->right);
      fprintf ( stderr, "\nGoing Left");
      test_print (T->left);
    }
  return 1;
}
int node2side (NT_node C)
{
  if ( !C->parent) return UNKNOWN;
  else if ( (C->parent)->left==C)return LEFT;
  else if ( (C->parent)->right==C)return RIGHT;
  else return UNKNOWN;
}
NT_node straighten_tree ( NT_node P, NT_node C, float new_dist)
{
  float dist;
  
  if ( C==NULL)return NULL;


  dist=C->dist;
  C->dist=new_dist;
  C->bot=NULL;
  
  if (C->left && C->right)
    {
      C->parent=P;
    }
  else if (!C->left)
    {
      C->left=C->parent;
      C->parent=P;
    }

  if ( C->parent==P);
  else if ( C->left==NULL && C->right==NULL)
    {
      C->parent=P;
    }
  else if ( C->right==P)
    {
      C->right=C->parent;
      C->parent=P;

      C=straighten_tree(C, C->right, dist);
    }
  else if ( C->left==P)
    {
      C->left=C->parent;
      C->parent=P;
      C=straighten_tree (C, C->left, dist);
    }
  else if ( C->parent==NULL)
    {
      C->parent=P;
    }
  
  return C;
}


NT_node unroot_tree ( NT_node T)
{
  
  if (!T || T->visited) return T;
  else T->visited=1;
  
  if (T->parent==NULL)
    {      
      float dist;
      (T->right)->dist=(T->left)->dist=(T->right)->dist+(T->left)->dist;
      (T->right)->parent=T->left;
      (T->left)->parent=T->right;
      T=T->left;
      T->leaf=0;
      vfree (T->parent);
    }
  else
    {
      T->parent=unroot_tree (T->parent);
      T->right=unroot_tree (T->right);
      T->left=unroot_tree (T->left);
    }
  T->visited=0;
  return T;
}

FILE * print_tree ( NT_node T, char *format,FILE *fp)
{
  Sequence *S;
  
  tree2nleaf(T);
  S=tree2seq(T, NULL);
  recode_tree (T, S);
  
  
  if ( strm (format, "binary"))
       fp=display_tree ( T,S->nseq, fp);
  else if ( strm2 (format, "newick_tree","newick"))
    {
      /*T=balance_tree (T);*/
      fp=rec_print_tree (T, fp);
      fprintf ( fp, ";\n");
    }
  else
    {
      fprintf ( stderr, "\nERROR: %s is an unknown tree format [FATAL:%s]\n", format, PROGRAM);
      myexit (EXIT_FAILURE);
    }
  return fp;
}
FILE * rec_print_tree ( NT_node T, FILE *fp)
{
  
  if (T==NULL || T->visited==1)
    return fp;
  else 
    T->visited=1;
  
  if ( T->isseq)
    {
      fprintf ( fp, " %s:%.5f",T->name, T->dist);
    }
  else
    {
      fprintf ( fp, "(");fp=rec_print_tree ( T->left, fp);
      fprintf ( fp, ",");fp=rec_print_tree ( T->right, fp);
      if ( T->bot){fprintf ( fp, ",");fp=rec_print_tree ( T->bot, fp);}
      fprintf ( fp, ")");
      if (T->parent || T->dist)
      {
        if ( T->bootstrap!=0)fprintf (fp, " %.2f", T->bootstrap);
        fprintf (fp, ":%.5f", T->dist);
      }
    }
  return fp;
}





/*********************************************************************/
/*                                                                   */
/*                                  Tree Functions                   */
/*                                                                   */
/*                                                                   */
/*********************************************************************/

int ** make_sub_tree_list ( NT_node **T, int nseq, int n_node)
        {


/*This function produces a list of all the sub trees*/
       

/*      /A */
/*    -* */
/*      \      /B */
/*       \    / */
/*        ---* */
/*                  \ */
/*                   *--C */
/*                    \ */
/*                     \D */
      
/*    Contains 4 i_nodes */
/*           8   nodes (internal nodes +leaves) */
/*           8 sub trees: */
/*           ABCD */
/*                 1111 */
/*           0111 */
/*           1000 */
/*           0100 */
/*           0011 */
/*           0001 */
/*           0010 */
       
      int **sub_tree_list;
      int a, n=0;
      
      
      if (T)
           {
             sub_tree_list=declare_int ( (n_node), nseq);
             make_all_sub_tree_list (T[3][0],sub_tree_list, &n);

           }
      else
           {
             sub_tree_list=declare_int (nseq, nseq);
             for ( a=0; a< nseq; a++)sub_tree_list[a][a]=1;
           }
      
      return sub_tree_list;
      }

void make_all_sub_tree_list ( NT_node N, int **list, int *n)
        {
      make_one_sub_tree_list (N, list[n[0]++]);
      if (N->leaf!=1)
          {
          make_all_sub_tree_list (N->left , list, n);
          make_all_sub_tree_list (N->right, list, n);
          }
      return;
      }

void make_one_sub_tree_list ( NT_node T,int *list)
        {
      if (T->leaf==1)
          {
          
          list[T->seq]=1;
          }
      else
          {
          make_one_sub_tree_list(T->left , list);
          make_one_sub_tree_list(T->right, list);
          }
      return;
      }


NT_node old_main_read_tree(char *treefile)
{
  /*Reads a tree w/o needing the sequence file*/
  NT_node **T;
  T=simple_read_tree (treefile);
  return T[3][0];
}



NT_node** simple_read_tree(char *treefile)
{
  int tot_node=0;
  NT_node **T;
  T=read_tree ( treefile, &tot_node,tree_file2nseq (treefile),NULL);
  return T;
}

NT_node** read_tree(char *treefile, int *tot_node,int nseq, char **seq_names)
      {

          /*The Tree Root is in the TREE[3][0]...*/
          /*TREE[0][ntot]--> pointer to each node and leave*/
      char ch;
      int  a,b;

      FILE *fp;
      int nseq_read = 0;
      int nnodes = 0;/*Number of Internal Nodes*/
      int ntotal = 0;/*Number of Internal Nodes + Number of Leaves*/
      int flag;
      int c_seq;
      NT_node **lu_ptr;
      NT_node seq_tree, root,p;


      tot_nseq=nseq;
      rooted_tree=distance_tree=TRUE;
      
      fp = vfopen(treefile, "r");
      fp=skip_space(fp);
      ch = (char)getc(fp);
      if (ch != '(')
            {
            fprintf(stderr, "Error: Wrong format in tree file %s\n", treefile);
                  myexit (EXIT_FAILURE);
            }
      rewind(fp);
  
      
      lu_ptr=(NT_node **)vcalloc(4,sizeof(NT_node*));
      lu_ptr[0] = (NT_node *)vcalloc(10*nseq,sizeof(NT_node));
      lu_ptr[1] = (NT_node *)vcalloc(10*nseq,sizeof(NT_node));
      lu_ptr[2] = (NT_node *)vcalloc(10*nseq,sizeof(NT_node));
      lu_ptr[3] =(NT_node *) vcalloc(1,sizeof(NT_node));
      
      seq_tree =(NT_node) declare_tree_node(nseq);
  
      set_info(seq_tree, NULL, 0, "  ", 0.0, 0);
      

      fp=create_tree(seq_tree,NULL,&nseq_read, &ntotal, &nnodes, lu_ptr, fp);
      fclose (fp);
            
      
      if (nseq != tot_nseq)
            {
            fprintf(stderr," Error: tree not compatible with alignment (%d sequences in alignment and %d in tree\n", nseq,nseq_read);
            myexit (EXIT_FAILURE);
            }
      if (distance_tree == FALSE)
            {
            if (rooted_tree == FALSE)
                  {
                        fprintf(stderr,"Error: input tree is unrooted and has no distances, cannot align sequences\n");
                        myexit (EXIT_FAILURE);
                  }
            }
        if (rooted_tree == FALSE)
            {

              root = reroot(seq_tree, nseq,ntotal,nnodes, lu_ptr);
              lu_ptr[1][nnodes++]=lu_ptr[0][ntotal++]=root;
              
            }
      else
            {
            root = seq_tree;
            }     
      lu_ptr[3][0]=root;
      tot_node[0]=nnodes;
      
      
      
      for ( a=0; a< ntotal; a++)
            {
            (lu_ptr[0][a])->isseq=(lu_ptr[0][a])->leaf;
            (lu_ptr[0][a])->dp=(lu_ptr[0][a])->dist;
            }
            
      
      for ( a=0; a< nseq; a++)
            {
            if (!seq_names)
              {
                flag=1;
                (lu_ptr[2][a])->order=(lu_ptr[2][a])->seq=a;
              }
            else
              {
                for ( flag=0,b=0; b<nseq; b++)
                  {                  
                  if ( strncmp ( (lu_ptr[2][a])->name, seq_names[b], MAXNAMES)==0)
                    {
                      flag=1;
                      
                      (lu_ptr[2][a])->order=(lu_ptr[2][a])->seq=b;
                        /*vfree ( (lu_ptr[2][a])->name);*/
                      sprintf ((lu_ptr[2][a])->name, "%s", seq_names[b]);
                    }
                  }
              }
            /*
            if ( flag==0  && (lu_ptr[0][a])->leaf==1)
                  {
                  fprintf ( stderr, "\n%s* not in tree",(lu_ptr[2][a])->name);
                  for ( a=0; a< ntotal; a++)
                    {
                      fprintf ( stderr, "\n%d %s",(lu_ptr[2][a])->leaf, (lu_ptr[2][a])->name);
                    } 
                  }
            */
            }
      
      if (seq_names)
        {
          int tnseq;
          char *s;
          char **tree_names;
          int fail_flag=0;
          tnseq=tree_file2nseq(treefile);
          tree_names=vcalloc ( tnseq, sizeof (char*));
          for (a=0; a<tnseq; a++)
            {
            s=(lu_ptr[2][a])->name;
            tree_names[a]=s;
            if ( name_is_in_list(s, seq_names, nseq, MAXNAMES+1)==-1)
              {
                fprintf (stderr, "\nERROR: Sequence %s in the tree [%s] is not in the alignment[FATAL:%s]\n", s, treefile, PROGRAM);
                fail_flag=1;
              }
            }
          for (a=0; a<nseq; a++)
            {
            s=seq_names[a];
            if ( name_is_in_list(s, tree_names, nseq, MAXNAMES+1)==-1)
              {
                fprintf (stderr, "\nERROR: Sequence %s in the sequences is not in the tree [%s][FATAL:%s]\n", s, treefile, PROGRAM);
                fail_flag=1;
              }
            }
          vfree (tree_names);
          if ( fail_flag==1)exit (EXIT_FAILURE);
        }
          
      for ( a=0; a< nseq; a++)
            {
            p=lu_ptr[2][a];
            c_seq=p->seq;
            
            while ( p!=NULL)
                  {
                  p->lseq[p->nseq]=c_seq;
                  p->nseq++;
                  p=p->parent;
                  }
            }
      
      
      return lu_ptr;    
      }


FILE * create_tree(NT_node ptree, NT_node parent,int *nseq,int  *ntotal,int  *nnodes,NT_node **lu, FILE *fp)
      {

      int i, type;
      float dist=0;
      float bootstrap=0;
      char *name;
      int ch;
      

      name=vcalloc ( MAXNAMES+1, sizeof (char));
      sprintf ( name, "   ");
      fp=skip_space(fp);
      ch = (char)getc(fp);
      
      if (ch == '(')
            {
            type = NODE;
                  name[0] = '\0';
                  lu[0][ntotal[0]] = lu[1][nnodes[0]] = ptree;
                  ntotal[0]++;
                  nnodes[0]++;
                  create_tree_node(ptree, parent);
                  fp=create_tree(ptree->left, ptree, nseq,ntotal,nnodes,lu,fp);
            ch = (char)getc(fp);
                if ( ch == ',')
                        {
                  fp=create_tree(ptree->right, ptree,nseq,ntotal,nnodes,lu,fp);
                  ch = (char)getc(fp);
                  if ( ch == ',')
                              {
                              
                              ptree = insert_tree_node(ptree);
                              lu[0][ntotal[0]] = lu[1][nnodes[0]] = ptree;
                              ntotal[0]++;
                              nnodes[0]++;
                              fp=create_tree(ptree->right, ptree,nseq,ntotal,nnodes,lu,fp);
                              rooted_tree = FALSE;
                        if ( getenv4debug ( "DEBUG_TREE")){fprintf ( stderr, "\n[DEBUG_TREE:create_tree] Unrooted Tree");}
                              }
                        }

                  fp=skip_space(fp);
                  ch = (char)getc(fp);
            }
      else
            {
            type=LEAF;
                  lu[0][ntotal[0]] = lu[2][nseq[0]] = ptree;
                  ntotal[0]++;
                  nseq[0]++;
                  name[0] = ch;
                  i=1;
                  ch = (char)getc(fp);
            if ( name[0]=='\'')
              {
                /*This protects names that are between single quotes*/
                while ( ch!='\'')
                  {
                  if (i < MAXNAMES) name[i++] = ch;
                  ch = (char)getc(fp);
                  }
                if (i < MAXNAMES) name[i++] = ch;
                while ((ch != ':') && (ch != ',') && (ch != ')'))ch = (char)getc(fp);
              }
            else
              {
                while ((ch != ':') && (ch != ',') && (ch != ')'))
                  {
                  if (i < MAXNAMES) name[i++] = ch;
                  ch = (char)getc(fp);
                  }
              }
            
                  name[i] = '\0';
      
                  if ( i>=(MAXNAMES+1)){fprintf (stderr, "\nName is too long");myexit (EXIT_FAILURE);}
                  if (ch != ':' && !isdigit(ch))
                  {
                    /*distance_tree = FALSE*/;
                  }
            }
      if (ch == ':')
            {
                  fp=skip_space(fp);
                  fscanf(fp,"%f",&dist);
                  fp=skip_space(fp);
            bootstrap=0;
                  }
      /*Tree with Bootstrap information*/
      else if (isdigit (ch))
        {
          ungetc(ch,fp);
          fscanf(fp,"%f",&bootstrap);
          if ( fscanf(fp,":%f",&dist)==1);
          else dist=0;
          fp=skip_space(fp);
        }
      else
        {
          ungetc ( ch, fp);
          skip_space(fp);
        }

      set_info(ptree, parent, type, name, dist, bootstrap);
      
      
      vfree (name);
      return fp;
      }

NT_node declare_tree_node (int nseq)
      {
      NT_node p;
      
      p= (NT_node)vcalloc (1, sizeof ( Treenode)); 
      p->left = NULL;
      p->right = NULL;
      p->parent = NULL;
      p->dist = 0.0;
      p->leaf = 0;
      p->order = 0;
      p->maxnseq=nseq;
      p->name=(char*)vcalloc (MAXNAMES+1,sizeof (char));
      p->name[0]='\0';
      p->lseq=(int*)vcalloc ( nseq, sizeof (int));
      return p;
      
      }

void set_info(NT_node p, NT_node parent, int pleaf, char *pname, float pdist, float bootstrap)
      {
      p->parent = parent;
      p->leaf = pleaf;
      p->dist = pdist;
      p->bootstrap=bootstrap;
      p->order = 0;
      

      sprintf (p->name, "%s", pname);
      
      if (pleaf ==1)
            {
            p->left = NULL;
            p->right = NULL;
            }

   }
NT_node insert_tree_node(NT_node pptr)
      {

      NT_node newnode;

      newnode = declare_tree_node( pptr->maxnseq);
      create_tree_node(newnode, pptr->parent);

      newnode->left = pptr;
      pptr->parent = newnode;

      set_info(newnode, pptr->parent, 0, "", 0.0, 0);

      return(newnode);
      }

void create_tree_node(NT_node pptr, NT_node parent)
      {
      pptr->parent = parent;
      pptr->left =declare_tree_node(pptr->maxnseq) ;
      (pptr->left)->parent=pptr;
      
      pptr->right =declare_tree_node(pptr->maxnseq) ;    
      (pptr->right)->parent=pptr;
      }     

FILE * skip_space(FILE *fp)
      {
      int   c;
  
      do
      c = getc(fp);
      while(isspace(c));
      if ( c==EOF)
            {
            fprintf ( stderr, "\nEOF");
            myexit (EXIT_FAILURE);
            }
      ungetc(c, fp);
      return fp;
      }


NT_node reroot(NT_node ptree, int nseq, int ntotal, int nnodes, NT_node **lu)
      {
      NT_node p, rootnode, rootptr;
      float   diff, mindiff=0, mindepth = 1.0, maxdist;
      int   i;
      int first = TRUE;
      
      
      
      rootptr = ptree;
      
      
      for (i=0; i<ntotal; i++)
            {
            p = lu[0][i];
            if (p->parent == NULL)
                  diff = calc_root_mean(p, &maxdist, nseq, lu);
            else
                  diff = calc_mean(p, &maxdist, nseq, lu);

            if ((diff == 0) || ((diff > 0) && (diff < 2 * p->dist)))
                   {
                        if ((maxdist < mindepth) || (first == TRUE))
                        {
                              first = FALSE;
                              rootptr = p;
                              mindepth = maxdist;
                              mindiff = diff;
                        }
                  }

            }
      
      if (rootptr == ptree)
            {
            mindiff = rootptr->left->dist + rootptr->right->dist;
            rootptr = rootptr->right;
            }
      
      rootnode = insert_root(rootptr, mindiff);
      
      diff = calc_root_mean(rootnode, &maxdist, nseq, lu);

      return(rootnode);
      }


float calc_root_mean(NT_node root, float *maxdist, int nseq, NT_node **lu)
      {
      float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
      NT_node p;
      int i;
      int nl, nr;
      int direction;
      
   
      dist = (*maxdist) = 0;
      nl = nr = 0;
      for (i=0; i< nseq; i++)
        {
          p = lu[2][i];
          dist = 0.0;
          while (p->parent != root)
            {
                  dist += p->dist;
                  p = p->parent;
            }
         if (p == root->left) direction = LEFT;
         else direction = RIGHT;
         dist += p->dist;

         if (direction == LEFT)
            {
            lsum += dist;
                  nl++;
            }
         else
            {
                  rsum += dist;
                  nr++;
            }
        
        if (dist > (*maxdist)) *maxdist = dist;
      }

      lmean = lsum / nl;
      rmean = rsum / nr;

      diff = lmean - rmean;
      return(diff);
      }

float calc_mean(NT_node nptr, float *maxdist, int nseq,NT_node **lu)
      {
      float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
      NT_node p, *path2root;
      float *dist2node;
      int depth = 0, i,j , n;
      int nl , nr;
      int direction, found;
      
      
      path2root = (NT_node *)vcalloc(nseq,sizeof(Treenode));
      dist2node = (float *)vcalloc(nseq,sizeof(float));

      depth = (*maxdist) = dist = 0;
      nl = nr = 0;
      p = nptr;
      while (p != NULL)
            {
            path2root[depth] = p;
            dist += p->dist;
            dist2node[depth] = dist;
            p = p->parent;
            depth++;
            }
 
/***************************************************************************
   *nl = *nr = 0;
   for each leaf, determine whether the leaf is left or right of the node.
   (RIGHT = descendant, LEFT = not descendant)
****************************************************************************/
      for (i=0; i< nseq; i++)
            {
                  p = lu[2][i];
                  if (p == nptr)
                  {
                        direction = RIGHT;
                        dist = 0.0;
                  }
                  else
                  {
                  direction = LEFT;
                  dist = 0.0;
                  
                  found = FALSE;
                  n = 0;
                  while ((found == FALSE) && (p->parent != NULL))
                        {
                              for (j=0; j< depth; j++)
                              if (p->parent == path2root[j])
                                          { 
                                          found = TRUE;
                                          n = j;
                                          }
                              dist += p->dist;
                              p = p->parent;
                        }
         
                  if (p == nptr) direction = RIGHT;
        
                  }
            if (direction == LEFT)
                  {
                        lsum += dist;
                        lsum += dist2node[n-1];
                        nl++;
                  }
            else
                  {
                        rsum += dist;
                        nr++;
                  }

            if (dist > (*maxdist)) *maxdist = dist;
            }

   vfree(dist2node);
   vfree(path2root);

   

   if ( nl==0 || nr==0)
     {
       myexit (EXIT_FAILURE);
     }
   lmean = lsum / nl;
   rmean = rsum / nr;
   
   diff = lmean - rmean;
   return(diff);
}

NT_node insert_root(NT_node p, float diff)
{
   NT_node newp, prev, q, t;
   float dist, prevdist,td;

   
   newp = declare_tree_node( p->maxnseq);

   t = p->parent;
   prevdist = t->dist;

   p->parent = newp;

   dist = p->dist;

   p->dist = diff / 2;
   if (p->dist < 0.0) p->dist = 0.0;
   if (p->dist > dist) p->dist = dist;

   t->dist = dist - p->dist; 

   newp->left = t;
   newp->right = p;
   newp->parent = NULL;
   newp->dist = 0.0;
   newp->leaf = NODE;

   if (t->left == p) t->left = t->parent;
   else t->right = t->parent;

   prev = t;
   q = t->parent;

   t->parent = newp;

   while (q != NULL)
     {
        if (q->left == prev)
           {
              q->left = q->parent;
              q->parent = prev;
              td = q->dist;
              q->dist = prevdist;
              prevdist = td;
              prev = q;
              q = q->left;
           }
        else
           {
              q->right = q->parent;
              q->parent = prev;
              td = q->dist;
              q->dist = prevdist;
              prevdist = td;
              prev = q;
              q = q->right;
           }
    }

/*
   remove the old root node
*/
   q = prev;
   if (q->left == NULL)
      {
         dist = q->dist;
         q = q->right;
         q->dist += dist;
         q->parent = prev->parent;
         if (prev->parent->left == prev)
            prev->parent->left = q;
         else
            prev->parent->right = q;
         prev->right = NULL;
      }
   else
      {
         dist = q->dist;
         q = q->left;
         q->dist += dist;
         q->parent = prev->parent;
         if (prev->parent->left == prev)
            prev->parent->left = q;
         else
            prev->parent->right = q;
         prev->left = NULL;
      }

   return(newp);
}




/*********************************************************************/
/*                                                                   */
/*                                  TrimTC3                          */
/*                                                                   */
/*                                                                   */
/*********************************************************************/

int *aln2seq_chain (Alignment *A, int **sim,int seq1, int seq2, int limit, int max_chain);

Alignment *seq2seq_chain (Alignment *A,Alignment*T, char *arg)
{
  int **sim=NULL;
  int *buf=NULL, *seq2keep, *list, *tname;
  int a, b, c, nl;
  int sim_limit;
  int min_sim=15;
  int max_chain=20;

  /*Estimate Similarity within the incoming sequences*/
  sim=seq2comp_mat (aln2seq(A), "blosum62mt", "sim2");
  
  /*Read and store the list of sequences to keep*/
  seq2keep=vcalloc (A->nseq, sizeof (int));
  tname=vcalloc (T->nseq, sizeof (int));
  for ( a=0; a< T->nseq; a++)
    {
      tname[a]=name_is_in_list ( T->name[a], A->name, A->nseq, 100);
      if (tname[a]>=0)seq2keep[tname[a]]=1;
    }
  
  /*Consider Every Pair of Sequences within the list of sequences to keep*/

  fprintf ( stderr, "\n");
  for ( a=0; a< T->nseq-1; a++)
    {
      if (tname[a]<0) continue;
      for ( b=a+1;b<T->nseq; b++)
      {

        if (tname[b]<0) continue;

        buf=NULL;sim_limit=90;
        while (!buf && sim_limit>min_sim)
          {
            buf=aln2seq_chain ( A, sim,tname[a],tname[b],sim_limit, max_chain);
            sim_limit-=5;
          }
        
        if ( buf)
          {
            for (c=0; c< A->nseq; c++)seq2keep[c]+=buf[c];
            vfree (buf);
          }
        else
          {
            fprintf ( stderr, "\n#Could Not Find any Intermediate sequence [MAx chain %d MinID %d\n", max_chain, min_sim);
          }
      }
    }
  
  list=vcalloc (A->nseq, sizeof (int));
  for ( nl=0,a=0; a< A->nseq; a++)
    if ( seq2keep[a])
      list[nl++]=a;
  
  A=extract_sub_aln (A, nl, list);
  
  free_int (sim, -1);
  vfree (list);
  return A;
}
int max_explore=10000000;/*Limits the number of explorations that tends to increase when id is small*/
int n_explore;

int *aln2seq_chain (Alignment *A, int **sim, int seq1, int seq2, int limit, int max_chain)
{
  int *used;
  int **chain;
  char output1[10000];
  char output2[10000];
  int a;
  int *list;
  int n, nseq=0;

  
  output1[0]=output2[0]='\0';
  used=vcalloc (A->nseq, sizeof(int));
  used[seq1]=1;
  
  if (find_seq_chain ( A, sim,used,seq1,seq1, seq2,1,limit, max_chain, &nseq))
    {
      list=vcalloc (A->nseq, sizeof (int));
      chain=declare_int (A->nseq, 2);
      for (n=0, a=0; a< A->nseq; a++)
      {
        if ( used[a])
          {
            chain[n][0]=used[a];
            chain[n][1]=a;
            list[used[a]-1]=a;n++;
          }
      }
      
      sprintf ( output2, "#%s %s N: %d Lower: %d Sim: %d DELTA: %d\n", A->name[list[0]], A->name[list[n-1]],n, limit,sim[list[0]][list[n-1]],limit-sim[list[0]][list[n-1]]);strcat (output1, output2);
      
      sort_int ( chain, 2, 0, 0, n-1);
      sprintf ( output2, "#");strcat(output1, output2);
      
      for ( a=0; a< n-1; a++)
      {
        sprintf (output2, "%s -->%d -->", A->name[chain[a][1]],sim[chain[a][1]][chain[a+1][1]]);strcat ( output1, output2);
      }
      sprintf ( output2, "%s\n", A->name[chain[n-1][1]]);strcat (output1, output2);
      
      free_int (chain, -1);
      vfree (list);
    }
  else
    {
      vfree (used);
      used=NULL;
    }
  /*  fprintf ( stdout, "%s", output1);*/
  fprintf ( stderr, "%s", output1);
  n_explore=0;
  return used;
}
static int ***pw_sim;
int find_seq_chain (Alignment *A, int **sim,int *used,int seq0,int seq1, int seq2,int chain_length, int limit, int max_chain, int *nseq)
{
  int a,b, seq, seq_sim;
  
  n_explore++;
  if ( n_explore>=max_explore)
    {
      return 0;
    }
  if (!pw_sim)
    {
      pw_sim=declare_arrayN(3, sizeof (int), A->nseq, A->nseq, 3);
      for ( a=0; a< A->nseq; a++)
      {
        for ( b=0; b<A->nseq; b++)
          {
            pw_sim[a][b][0]=b;
            pw_sim[a][b][1]=sim[a][b];
            pw_sim[a][b][2]=sim[b][seq2];
          }
        sort_int_inv ( pw_sim[a],3, 1, 0, A->nseq-1);
      }
    }
  
  if ( chain_length>max_chain)return 0;
  else if ( sim[seq1][seq2]>=limit)
    {
      used[seq2]=chain_length+1;
      nseq[0]++;
      return 1;
    }
  else
    {
      int delta_seq2;
      for ( a=0; a< A->nseq; a++)
      {
        seq=pw_sim[seq1][a][0];
        seq_sim=pw_sim[seq1][a][1];
        delta_seq2=pw_sim[seq1][a][2]-sim[seq1][seq2];
        
        

        if ( used[seq])continue;
        else if ( seq_sim<limit)continue;
        else
          {
            used[seq]=chain_length+1;
            nseq[0]++;
            if ( find_seq_chain( A, sim,used,seq0,seq, seq2, chain_length+1,limit, max_chain, nseq))
            {return 1;}
            else
            used[seq]=0;
          }
      }
      return 0;
    }
  return 0;
}


//
//
//
//
/*********************************************************************/
/*                                                                   */
/*                                   New Tree Parsing                */
/*                                                                   */
/*********************************************************************/
NT_node new_insert_node (NT_node T);
int scan_name_and_dist ( FILE *fp, char *name, float *dist);
NT_node new_declare_tree_node ();
int tree_contains_duplicates (NT_node T);
int display_tree_duplicates (NT_node T);
static int unrooted;
NT_node check_tree (NT_node T);
NT_node main_read_tree (char *treefile)
{
  FILE *fp;
  Sequence *S;
  
  NT_node T;
  char c;
  char *tmp;
  int a;
  
  fp=vfopen (remove_charset_from_file (treefile, " \t\n\r"), "r");
  T=new_get_node (NULL,fp);
  vfclose (fp);
  
  S=tree2seq(T, NULL);
  
  T=recode_tree(T, S);
    
  return T;
}

//This function codes the tree into lseq and lseq2
//lseq:  list of the N->nseq child sequences of the node
//lsseq2:Array of size Nseq, with lseq[a]=1 if sequence a is child of node N 
NT_node recode_tree (NT_node T, Sequence *S)
{

  
  if (!T) return T;
  
  vfree (T->lseq); T->lseq=vcalloc (S->nseq, sizeof (int));
  vfree (T->lseq2); T->lseq2=vcalloc (S->nseq, sizeof (int));
  vfree (T->idist); T->idist=vcalloc (S->nseq, sizeof (int));
  vfree (T->ldist); T->ldist=vcalloc (S->nseq, sizeof (int));
  
  T->nseq=0;
  
  if ( T->isseq)
    {
      int i;
      i=name_is_in_list (T->name, S->name, S->nseq, -1);
      if (i!=-1)
      {
        T->lseq[T->nseq++]=i;
        T->lseq2[i]=1;
        T->idist[i]=1;
        T->ldist[i]=(int)(T->dist*10000);;
      }
      else
      {
        printf_exit ( EXIT_FAILURE, stderr, "\nERROR: Sequence %s is in the Tree but Not in the  Sequence dataset [code_lseq][FATAL:%s]", T->name, PROGRAM);
      }
    }
  else
    {
      NT_node R,L;
      int a;
      
      R=recode_tree (T->left, S);
      L=recode_tree (T->right, S);
      
      if (R)for (a=0; a<R->nseq; a++)
      {
        T->lseq2[R->lseq[a]]=1;
      }
        
      if (L)for (a=0; a<L->nseq; a++)
      {
        T->lseq2[L->lseq[a]]=1;
      }
      for (a=0; a<S->nseq; a++)
      {
        if (T->lseq2[a])T->lseq[T->nseq++]=a;
        if (T->lseq2[a])T->idist[a]=R->idist[a]+L->idist[a]+1;
        if (T->lseq2[a])T->ldist[a]=R->ldist[a]+L->ldist[a]+(int)(T->dist*10000);;
        
      }
    }
  return T;
}

NT_node display_leaf_nb (NT_node T, int n, FILE *fp, char * name)
{
  int a, debug=0, t=0;
  if (!T) return T;
  
  
  display_leaf_nb (T->right, n, fp, name);
  display_leaf_nb (T->left, n, fp, name);
  
 
  if (!T->isseq);
  else
    {
      NT_node P;
 
      P=T->parent;
      fprintf (fp, "%s ", T->name);
      for (a=0; a< n; a++)fprintf (fp, "%d", P->lseq2[a]);
      fprintf ( fp," %s\n", name);
    }
  return T;
}
static int root4dc;
NT_node display_code (NT_node T, int n, FILE *fp)
{
  int a, debug=0, t=0;
  if (!T) return T;
  
  if (!T->parent)
    root4dc=0;

  
  
  if (!T->parent && debug) fprintf ( fp, "\nDISPLAY TREE: START");
  display_code (T->right, n, fp);
  display_code (T->left, n, fp);

  fprintf ( fp, "\n");
  if (!T->parent) return T;
  else if ( !(T->parent)->parent && root4dc==1)return T;
  else if ( !(T->parent)->parent && root4dc==0)root4dc=1;
  
  for (a=0; a< n; a++)
    t+=T->lseq2[a];
  if ( t<=n/2)
    for (a=0; a< n; a++)fprintf (fp, "%d", T->lseq2[a]);
  else
    for (a=0; a< n; a++)fprintf (fp, "%d", 1-T->lseq2[a]);
  if (T->isseq && debug)fprintf (fp, "%s", T->name);
  
  if (!T->parent && debug) fprintf (fp, "\nDISPLAY TREE: FINISHED");
  return T;
}
NT_node display_dist (NT_node T, int n, FILE *fp)
{
  int a, debug=0, t=0;
  if (!T) return T;
  
  if (!T->parent)
    root4dc=0;
  
  display_dist (T->right, n, fp);
  display_dist (T->left, n, fp);
  
  fprintf ( stdout, "\n");
  for ( a=0; a< n; a++)
    fprintf ( stdout, " %2d ", T->idist[a]);
  fprintf ( stdout, "\n");
  
  return T;
}
  
NT_node check_tree (NT_node T)
{
  if (T) HERE("CHECK %s", T->name);
  if (!T)
    {
      HERE ("ERROR: Empty Group");
    }
  
  else if (T->isseq)return T;
  else 
    {
      HERE ("R");
      check_tree (T->right);
      HERE ("L");
      check_tree (T->left);
      return NULL;
    }
}

NT_node new_reroot_tree( NT_node T)
{
  T=unroot_tree (T);
  return T;
}
NT_node new_get_node (NT_node T, FILE *fp)
{
  NT_node NN;
  int c;


  c=fgetc (fp);
  if (!T)T=declare_tree_node (100);
  
  
  if ( c==';')
    {
      if (!T->right)T=T->left;
      else if (!T->left)T=T->right;
      vfree (T->parent);T->parent=NULL;
      return T;
    }
  else if ( c==')')
    {
      scan_name_and_dist (fp, T->name, &T->dist);
      return new_get_node (T->parent, fp);
    }
  else if ( c==',')
    {
      return new_get_node (T, fp);
    }
  else
    {
      NN=new_insert_node (T);
      
      if ( c=='(')
      {
        return new_get_node (NN, fp);
      }
      else
      {
        ungetc (c, fp);
        scan_name_and_dist (fp, NN->name, &NN->dist);
        NN->leaf=1;
        NN->isseq=1;
        return new_get_node (T, fp);
      }
    }
}
int scan_name_and_dist ( FILE *fp, char *name, float *dist)
{
  int a, c;
  char number [1000];
  
  a=0;
  c=fgetc (fp);ungetc (c, fp);
  
  
  if ( c==';')return 0;
    
  while ((c=fgetc(fp))!=':' && c!=EOF)
    {
      name[a++]=c;
    }
  name [a]='\0';

  if ( c!=':')
    {
      ungetc (c, fp);
      return 1;
    }
  a=0;
  while (isdigit((c=fgetc(fp))) || c=='.' || c=='-')
    {
      number[a++]=c;
    }

  ungetc (c, fp);
  number[a]='\0';
  
  dist[0]=atof (number);
  return 2;
}
NT_node new_insert_node (NT_node T)
{
  NT_node NN;


  NN=new_declare_tree_node (100);
  NN->parent=T;
  if (!T)
    {
      return NN;
    }
  else if (T->left==NULL)
    {
      T->left=NN;

    }
  else if ( T->right==NULL)
    {
      T->right=NN;
    }

  else
    {
      NT_node P;
      
      NN->right=T;
      P=NN->parent=T->parent;
      T->parent=NN;
      
      if (P && P->right==T)P->right=NN;
      else if ( P && P->left==T)P->left=NN;
      
      NN->left=new_declare_tree_node (100);
      (NN->left)->parent=NN;
      return NN->left;
    }

  return NN;
}

NT_node new_declare_tree_node ()
{
      NT_node p;
      static node_index;
      p= (NT_node)vcalloc (1, sizeof ( Treenode)); 
      p->left = NULL;
      p->right = NULL;
      p->parent = NULL;
      p->dist = 0.0;
      p->leaf = 0;
      p->order = 0;
      p->index=++node_index;
      p->maxnseq=1000;
      p->name=(char*)vcalloc (MAXNAMES+1,sizeof (char));
      p->name[0]='\0';
      return p;
      
      }
int new_display_tree (NT_node T, int n)
{
  int in;

  in=n;
  
  
  if ( T->parent)fprintf (stdout, "\nNode %d: has parents)", in);
  else fprintf (stdout, "\nNode %d: NO parents)", in);

  if ( T->right)
    {
      fprintf (stdout, "\nNode %d has Right Child", in);
      n=new_display_tree (T->right, n+1);
    }
  else fprintf ( stdout, "\nNode %d No Right\n", in);
  
  if ( T->left)
    {
      fprintf (stdout, "\nNode %d has Left Child", in);
      n=new_display_tree (T->left, n+1);
    }
  else fprintf ( stdout, "\nNode %d No Left\n", in);
  
  if ( T->bot)
    {
      fprintf (stdout, "\nNode %d has Bot Child", in);
      n=new_display_tree (T->bot, n+1);
    }
  else fprintf ( stdout, "\nNode %d No Bot\n", in);
      
  
  if (T->isseq)
    {
      fprintf (stdout, "\nNode %d is %s", in, T->name);
      return in;
    }
}
int display_tree_duplicates (NT_node T)
{
  static Sequence *S;
  static int *dup;
  int a, b;
  
  free_sequence (S, -1);
  vfree (dup);
  
  S=tree2seq (T, NULL);
  dup=vcalloc ( S->nseq, sizeof (int));
  
  for (a=0; a< S->nseq-1; a++)
    for ( b=a+1; b<S->nseq; b++)
      {
      if ( strm (S->name[a], S->name[b]))
        {
          dup[a]++;
        }
      }
  for (a=0; a< S->nseq-1; a++)
    for ( b=a+1; b<S->nseq; b++)
      {
      if ( strm (S->name[a], S->name[b]) && dup[a])
        {
          fprintf ( stderr, "\nSequence %s is duplicated %d Times in the tree", S->name[a], dup[a]);
          dup[a]=0;
        }
      }
  return 0;
}
int tree_contains_duplicates (NT_node T)
{
  static Sequence *S;
  int a, b;
  
  free_sequence (S, -1);

  S=tree2seq (T, NULL);
  for (a=0; a< S->nseq-1; a++)
    for ( b=a+1; b<S->nseq; b++)
      {
      if ( strm (S->name[a], S->name[b]))return 1;
      }
  return 0;
}
 
float display_avg_bootstrap ( NT_node T)
{
  float tot;
  int n;
  
  tot=tree2tot_dist (T, BOOTSTRAP);
  n=tree2n_branches (T, BOOTSTRAP);
  fprintf ( stdout, "\nAVERAGE BOOTSRAP: %.3f on %d Branches\n", (n>0)?tot/n:0, n);
  return (n>0)?tot/n:0;
}
          

int tree2n_branches(NT_node T, int mode)
{
  int n=0;

  if (!T) return 0;
  if (!T->parent);
  else if  ((T->isseq && mode !=BOOTSTRAP) || !T->isseq)
    {
      n++;
    }
  n+=tree2n_branches(T->right, mode);
  n+=tree2n_branches(T->left, mode);

  return n;
}
  
float tree2tot_dist ( NT_node T, int mode)
{
  float t=0;
  
  
  if ( !T)return 0;
  
  if ( !T->parent);
  else if  ((T->isseq && mode !=BOOTSTRAP) || !T->isseq) 
    {
      t+=T->dist;
    }
  
  t+=tree2tot_dist(T->right, mode);
  t+=tree2tot_dist(T->left, mode);
  return t;
}

//This function displays all the sequences within the tree sorted by node label
int cmp_tree_array ( const void *vp, const void *vq);
int node_sort ( char *name, NT_node T)
{
  NT_node N;
  int nseq;
  int **array, a;
  Sequence *S;
  while (T->parent)T=T->parent;
  
  nseq=tree2nseq (T);
  array=declare_int (nseq, 2);
  N=tree2node (name, T);
  
  if (N==NULL)printf_exit (EXIT_FAILURE, stderr, "ERROR: %s is not in the tree [FATAL:%s]\n", name, PROGRAM);
  array=display_tree_from_node (N,0,0, array);
  qsort ( array, nseq, sizeof (int**), cmp_tree_array);
  S=tree2seq(T, NULL);
  for (a=0; a<nseq; a++)
    fprintf ( stdout, ">%s %d %d\n", S->name[array[a][0]], array[a][1], array[a][2]);
  exit (EXIT_SUCCESS);
}

NT_node tree2node (char *name, NT_node T)
{
  NT_node T1, T2;
  if ( !T) return T;
  else if (T->leaf && strm (T->name, name)) return T;
  else 
    {

      T1=tree2node ( name, T->right);
      T2=tree2node ( name, T->left);
      return (T1>T2)?T1:T2;
  }
  
}

int ** display_tree_from_node (NT_node T, int up, int down, int **array)
{
  
  if (!T || T->visited)return array;
  
  T->visited=1;
  if (T->isseq)
    {
      array[T->lseq[0]][0]=T->lseq[0];
      array[T->lseq[0]][1]=up;
      array[T->lseq[0]][2]=down;
      
    }
  else
    {
      array=display_tree_from_node ( T->left ,up, down+1, array);
      array=display_tree_from_node ( T->right,up, down+1, array);
    }
  array=display_tree_from_node ( T->parent,up+1, 0, array);
  T->visited=0;
  return array;

}

int cmp_tree_array ( const void *p, const void *q)
{
  const int **vp=p;
  const int **vq=q;
  if (vp[0][1]>vq[0][1])return 1;
  else if ( vp[0][1]<vq[0][1]) return -1;
  else if ( vp[0][2]>vq[0][2]) return 1;
  else if ( vp[0][2]<vq[0][2]) return -1;
  else return 0;
}

NT_node * read_tree_list (Sequence *S)
{
  NT_node *T;
  int a;
  
  T=vcalloc ( S->nseq+1, sizeof (NT_node));

  for ( a=0; a<S->nseq; a++)
    {
      T[a]=main_read_tree (S->name[a]);
      T[a]->file=vcalloc (strlen (S->name[a]), sizeof (char));
      sprintf (T[a]->file, "%s", S->name[a]);
    }
  return T;
}
int treelist2dmat ( Sequence *S)
{
  NT_node *T;
  int n=0, a, b, tot;
  float v;
  Sequence *TS;
  int *used;
  int ntop;
  
  n=S->nseq;
  T=read_tree_list (S);
  TS=tree2seq(T[0], NULL);
  fprintf (stdout, "\n%d", S->nseq);
  for (a=0; a<n; a++)
    {
      fprintf ( stdout,"\n%-10s ", S->name[a]); 
      for ( b=0; b<n; b++)
      {
        v=100-simple_tree_cmp (T[a], T[b], TS, 1);
        fprintf ( stdout, "%.4f ", v);
      }
      
    }
  
  exit (EXIT_SUCCESS);
  return 0;
}
int treelist2leafgroup ( Sequence *S, Sequence *TS)
{
  NT_node *T;
  int n=0,nseq, a, b,c,s, tot;
  int v;
  int *used;
  int ntop;
  char *split_file, *sorted_split_file;
  char *buf=NULL;
  FILE *fp;
  char *name, *fname, *group, *ref_group, *list;
  
  T=read_tree_list (S);
  if (!TS)TS=tree2seq(T[0], NULL);
  
  name=vcalloc (1000, sizeof (char));
  fname=vcalloc (1000, sizeof (char));
  group=vcalloc (TS->nseq*10, sizeof (char));
  ref_group=vcalloc (TS->nseq*10, sizeof (char));
  list=vcalloc (100*S->nseq, sizeof (char));
  split_file=vtmpnam (NULL);
  sorted_split_file =vtmpnam (NULL);
  
  n=S->nseq;
  used=vcalloc (n, sizeof (int));
 
  T=read_tree_list (S);
  if (!TS)TS=tree2seq(T[0], NULL);
  nseq=TS->nseq;
  fp=vfopen (split_file, "w");
  
  for ( a=0; a< S->nseq; a++)
    {
      
      T[a]=prune_tree  (T[a], TS);
      T[a]=recode_tree (T[a], TS);
      display_leaf_nb (T[a], TS->nseq,fp, S->name[a]);
    }
  vfclose (fp);
  
  for (s=0; s< TS->nseq; s++)
    {
      int i,tot=0;
      

      printf_system ( "cat %s | grep %s| sort > %s", split_file,TS->name[s], sorted_split_file);
     
      vfopen (sorted_split_file, "r");
      ref_group[0]=group[0]='\0';
      
      while ( (c=fgetc (fp))!=EOF)
      {
        
        ungetc (c, fp);
        buf=vfgets (buf, fp);
        sscanf (buf, "%s %s %s\n", name, group, fname);

        if ( !ref_group[0]|| !strm (group, ref_group))
          {
            if (ref_group[0])
            
            {fprintf (stdout, "%s %6.2f %s",name, (((float)n*100)/(float)S->nseq), ref_group);
              for (i=0,a=0; a<nseq; a++)
                if (ref_group[a]=='1' && a!=s)fprintf (stdout, " %s ", TS->name[a]);
              fprintf ( stdout, "\n");
              fprintf (stdout, "\nLIST: %s\n", list);
            }
            list[0]='\0';
            sprintf ( ref_group, "%s", group);
            list=strcatf (list, " %s", fname);
            n=1;
          }
        else
          {

            list=strcatf (list, " %s", fname);
            n++;
          }
      }

      fprintf (stdout, "%s %6.2f %s",name, (((float)n*100)/(float)S->nseq), group);
      for (i=0,a=0; a<nseq; a++)
      if (group[a]=='1' && a!=s)fprintf (stdout, " %s ", TS->name[a]);
      fprintf (stdout, "\nLIST %s\n", list);
      fprintf ( stdout, "\n");
      vfclose (fp);
    }
  
  exit (0);
}

int treelist2splits ( Sequence *S, Sequence *TS)
{
  NT_node *T;
  int n=0,nseq, a, b,c, tot;
  int v;
  int *used;
  int ntop;
  char *split_file, *sorted_split_file;
  char *buf=NULL, *ref_buf=NULL;
  FILE *fp;
  
  split_file=vtmpnam (NULL);
  sorted_split_file =vtmpnam (NULL);
  
  n=S->nseq;
  used=vcalloc (n, sizeof (int));
 
  T=read_tree_list (S);
  if (!TS)TS=tree2seq(T[0], NULL);
  nseq=TS->nseq;
  fp=vfopen (split_file, "w");
  
  for ( a=0; a< S->nseq; a++)
    {
      
      T[a]=prune_tree  (T[a], TS);
      T[a]=recode_tree (T[a], TS);
      display_leaf_nb (T[a], TS->nseq,fp, S->name[a]);
    }
  vfclose (fp);
  
  printf_system ( "cat %s | grep 1| sort > %s", split_file, sorted_split_file);
  
  vfopen (sorted_split_file, "r");

  while ( (c=fgetc (fp))!=EOF)
    {
      
      ungetc (c, fp);
      buf=vfgets (buf, fp);
      buf [strlen(buf)-1]='\0';
      
      if ( ref_buf==NULL)
      {
        ref_buf=vcalloc (strlen (buf)+1, sizeof (char));
        sprintf ( ref_buf, "%s", buf);
        n=1;
      }
      else if ( !strm (buf, ref_buf))
      {
        int i;
        fprintf ( stdout, "%3d %s(", n, ref_buf);
        for (i=0,a=0; a<nseq; a++)
          if (ref_buf[a]=='1')
            {
            if (i==1)fprintf(stdout, ",");
            fprintf (stdout, "%s", TS->name[a]);
            i=1;
            }
        fprintf ( stdout, "),(");
        for (i=0,a=0; a<nseq; a++)
          if (ref_buf[a]=='0')
            {
            if (i==1) fprintf ( stdout, ",");
            fprintf (stdout, "%s", TS->name[a]);
            i=1;
            }
              
        fprintf (stdout, ")\n");
        sprintf ( ref_buf, "%s", buf);
        n=1;
      }
      else
      {
        n++;
      }
    }
  vfclose (fp);
      
  
  exit (0);
}

      
int treelist2lti ( Sequence *S, Sequence *TS, char *ngb_size)
{
  NT_node *T;
  int a,b, c, d,e, f, ****dist;
  int ngb;
  float score0=0, score1=0;
  
  T=read_tree_list (S);
  if (!TS)TS=tree2seq(T[0], NULL);
  dist=vcalloc ( S->nseq, sizeof (int****));
  
  
  for ( a=0; a< S->nseq; a++)
    {
      
      T[a]=prune_tree  (T[a], TS);
      T[a]=recode_tree (T[a], TS);
      dist[a]=tree2dist(T[a], TS, NULL);
    }
  
  if (!ngb_size)ngb=TS->nseq*2;
  else ngb=atoi (ngb_size);
  for (a=0; a<TS->nseq; a++)
    {

      float delta, d1, d2;

      score0=score1=0;

      for (b=0; b<S->nseq; b++)
      {
        int min;
        for (min=10000,d=0; d<TS->nseq; d++)
          {
            d1=dist[b][0][a][d];
            if (d1!=0)min=MIN(min,d1);
          }
        
              
        for (c=0; c<S->nseq; c++)
          {
            for (d=0; d<TS->nseq;d++)
            {
              
              d1=dist[b][0][a][d];
              d2=dist[c][0][a][d];
            
              if ( d1<=ngb || d2<=ngb)
                {
                  delta=d1-d2;
                }
              else delta=0;

              score0+=(delta<0)?-delta:delta;
            }
            
          }
      }
      score0=score0/((float)(S->nseq*S->nseq)*TS->nseq);
      fprintf ( stdout, "\n%20s %7.3f ", TS->name[a], score0);
    }
  
  return 0;
}


int ***tree2dist (NT_node T, Sequence *S, int ***d)
{
  int *l0, *r0,*l1, *r1, a, b;
  
  
  if (!T) return d;
  if (!S)S=tree2seq(T, NULL);
  if (!d)
    {
      d=declare_arrayN (3, sizeof (float),2, S->nseq, S->nseq);
      T=prune_tree(T, S);
      T=recode_tree (T, S);
    }
  
  if (!T->left)return d;
  if (!T->right) return d;
  
  l0=(T->left)->idist;
  r0=(T->right)->idist;

  l1=(T->left)->ldist;
  r1=(T->right)->ldist;
  
  for (a=0; a< S->nseq; a++)
    for (b=0; b<S->nseq; b++)
      {
      if (l0[a]>0 && r0[b]>0)d[0][a][b]=d[0][b][a]=l0[a]+r0[b];
      if (l0[a]>0 && r0[b]>0)d[1][a][b]=d[1][b][a]=l1[a]+r1[b];
      }
  
  d=tree2dist (T->left, S, d);
  d=tree2dist (T->right, S, d);
  
 
  return d;
}



int **tree2dist_split ( NT_node T, Sequence *S, int **dist)
{
  
  FILE *fp;
  int a, b, c, d, n=0;
  char *buf=NULL, **list=NULL, *split_file;


  if (!S)S=tree2seq(T, NULL);
  
  T=prune_tree  (T, S);
  T=recode_tree (T, S);
  
  split_file=vtmpnam (NULL);
  fp=vfopen (split_file, "w");
  display_code (T, S->nseq,fp);
  vfclose (fp);
 
  list=declare_char (2*S->nseq, S->nseq+1);
  fp=vfopen (split_file, "r");

  while ((buf=vfgets (buf,fp))!=NULL)
    {
      if (buf[0]=='1' || buf[0]=='0')sprintf (list[n++], "%s", buf);
    }
  vfclose (fp);
  dist=declare_int ( S->nseq, S->nseq);
  for (a=0; a< S->nseq; a++)
    for ( b=0; b<S->nseq; b++)
      for (c=0; c<n; c++)
      if (list[c][a]!=list[c][b])dist[a][b]++;
  
 
  return dist;
}

int treelist2groups (Sequence *S, Sequence *TS)
{
  NT_node *T;
  int n=0, a, b, tot;
  int v;
  int *used;
  int ntop;
  
  n=S->nseq;
  used=vcalloc (n, sizeof (int));
 
  T=read_tree_list (S);
  if (TS!=NULL)
    {
      for (a=0; a< S->nseq; a++)
      {
        T[a]=prune_tree (T[a], TS);
        T[a]=recode_tree (T[a], TS);
      }
    }
  else
    {
      TS=tree2seq(T[0], NULL);
    }

  for (ntop=0,a=0; a<n; a++)
    {
      if (used[a]==0)
      {
        ntop++;
        fprintf ( stdout, "\nTree %s:",S->name[a]);
        used[a]=1;
      }
      else continue;
      tot=1;
      for ( b=0; b<n; b++)
      {
        v=0;
        
        v=(int)simple_tree_cmp (T[a], T[b], TS, 1);
        if ( v==100)
          {
            used[b]=1;
            used[a]++;
            fprintf (stdout," %s ", S->name[b]);
            tot++;
          }
      }
      
      fprintf ( stdout, "__ N=%d\n", tot-1);
    }
  for (a=0; a< n; a++)
    {
      if ( used[a]>1)
      fprintf ( stdout, "\n>%-15s %4d TOPOLOGY_LIST", S->name[a], used[a]-1);
    }
  
  fprintf ( stdout, "\nTotal Number of different topologies: %d", ntop);
  exit (EXIT_SUCCESS);
}
float simple_tree_cmp (NT_node T1, NT_node T2,Sequence *S, int mode)
{
  Tree_sim *TS1, *TS2;
  float t, w, l, n;
  
  TS1=vcalloc (1, sizeof (Tree_sim));
  TS2=vcalloc (1, sizeof (Tree_sim));
  
  
  T1=recode_tree(T1, S);
  T2=recode_tree(T2, S);
  
  n=new_compare_trees ( T1, T2, S->nseq, TS1);
  new_compare_trees ( T2, T1, S->nseq, TS2);
  


  t=(TS1->uw+TS2->uw)*100/(TS1->max_uw+TS2->max_uw);
  w=(TS1->w+TS2->w)*100/(TS1->max_w+TS2->max_w);
  l=(TS1->d+TS2->d)*100/(TS1->max_d+TS2->max_d);
  
  vfree (TS1); vfree (TS2);
  if ( mode ==1)return t;
  else if (mode ==2) return w;
  else  return l;
}


/*********************************COPYRIGHT NOTICE**********************************/
/* Centre National de la Recherche Scientifique (CNRS) */
/*and */
/*Cedric Notredame */
/*Fri Oct 26 17:03:04     2007. */
/*All rights reserved.*/
/*This file is part of T-COFFEE.*/
/**/
/*    T-COFFEE is free software; you can redistribute it and/or modify*/
/*    it under the terms of the GNU General Public License as published by*/
/*    the Free Software Foundation; either version 2 of the License, or*/
/*    (at your option) any later version.*/
/**/
/*    T-COFFEE is distributed in the hope that it will be useful,*/
/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
/*    GNU General Public License for more details.*/
/**/
/*    You should have received a copy of the GNU General Public License*/
/*    along with Foobar; if not, write to the Free Software*/
/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
/*...............................................                                                                                      |*/
/*  If you need some more information*/
/*  cedric.notredame@europe.com*/
/*...............................................                                                                                                                                     |*/
/**/
/**/
/*    */
/*********************************COPYRIGHT NOTICE**********************************/

Generated by  Doxygen 1.6.0   Back to index