Logo Search packages:      
Sourcecode: t-coffee version File versions  Download package

util_aln_analyze.c

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h> 
#include "io_lib_header.h"
#include "util_lib_header.h"
#include "dp_lib_header.h"
#include "define_header.h"
/************************************************************************************/
/*                NEW      ANALYZE 2    : SAR                                        */
/************************************************************************************/
float display_prediction_old (int **prediction, int n, Alignment *A, Alignment *S, int field);

float display_prediction (int ***count, Alignment *S, int c, int n);
Alignment * filter_aln4sar0 ( Alignment *A, Alignment *S, int c, int leave, char *mode);
Alignment * filter_aln4sar1 ( Alignment *A, Alignment *S, int c, int leave, char *mode);
Alignment * filter_aln4sar2 ( Alignment *A, Alignment *S, int c, int leave, char *mode);
Alignment * filter_aln4sar3 ( Alignment *A, Alignment *S, int c, int leave, char *mode);
Alignment * filter_aln4sar4 ( Alignment *A, Alignment *S, int c, int leave, char *mode);
Alignment * filter_aln4sar5 ( Alignment *A, Alignment *S, int c, int leave, char *mode);

int **sar2profile ( Alignment *A, Alignment *S, int c, int leave);
int **sar2profile_sim ( Alignment *A, Alignment *S, int **sim, int comp, int leave);
int sar_profile2score ( char *seq, int **profile);
double sar_vs_seq1 ( char *sar, char *seq, float gl, int **sim, char *best_aa);
double sar_vs_seq2 ( char *sar, char *seq, float ng, int **mat, char *a);
double sar_vs_seq3 ( char *sar, char *seq, float ng, int **mat, char *a);
double sar_vs_seq4 ( char *sar, char *seq, float ng, int **mat, char *a);
int make_sim_pred ( Alignment *A,Alignment *S, int comp, int seq);

int **sar2profile_sim ( Alignment *A, Alignment *S, int **sim, int comp, int leave)
{

  int a, b, r, c, c1, c2, r1, r2, s, p;
  int ***cache, **profile;
  
  
  profile=declare_int (A->len_aln, 26);
  cache=declare_arrayN (3,sizeof (int),2,A->len_aln, 26);
  
  for ( a=0; a< A->len_aln; a++)
    for ( b=0; b< A->nseq; b++)
      {
      r=tolower(A->seq_al[b][a]);
      c=( S->seq_al[comp][b]=='I')?1:0;
      if (b==leave || is_gap(r)) continue;
      cache [c][a][r-'a']++;
      }
  for (a=0; a< A->nseq; a++)
    {
      if ( a==leave) continue;
      for ( b=0; b< A->nseq; b++)
      {
        c1=(S->seq_al[comp][a]=='I')?1:0;
        c2=(S->seq_al[comp][b]=='I')?1:0;
        if ( b==leave || b==a || c1!=1 || c1==c2) continue;
        s=sim[a][b];
        
        for (p=0; p<A->len_aln; p++)
          {
            r1=tolower(A->seq_al[a][p]);
            r2=tolower(A->seq_al[b][p]);
            if ( is_gap(r1) || is_gap(r2) || r1==r2)continue;
            r1-='a';r2-='a';
            if (cache[1][p][r2])continue;
            if ( s<50)continue;
            profile[p][r2]-=s;
          }
      }
    }

  free_arrayN((void***)cache,3);
  return profile;

}
int **sar2profile ( Alignment *A, Alignment *S, int comp, int leave)
{

  int a, b,c,r, n, v, npos=0;
  int ***cache, **profile;
  int ncat;
  float n_gap, max_gap;
  profile=declare_int (A->len_aln, 26);
  cache=declare_arrayN (3,sizeof (int),2,A->len_aln, 26);



  for ( n=0, a=0; a< A->nseq; a++)
    {
      if ( a==leave) continue;
      else n+=(S->seq_al[comp][a]=='I')?1:0;
    }

  for ( a=0; a< A->len_aln; a++)
    for ( b=0; b< A->nseq; b++)
      {
      r=tolower(A->seq_al[b][a]);
      c=( S->seq_al[comp][b]=='I')?1:0;
      if (b==leave) continue;
      else if (is_gap(r))continue;
      r-='a';
      cache [c][a][r]++;
      }

  ncat=15; /*ncat: limit the analysis to columns containing less than ncat categories of aa*/
  max_gap=0.05;
  for (a=0; a< A->len_aln; a++)
    {
      for (n_gap=0,b=0; b< A->nseq; b++)
      n_gap+=(is_gap(A->seq_al[b][a]));
      n_gap/=(float)A->nseq;
      
      if ( n_gap> max_gap)continue;
      
      for (v=0,r=0; r< 26; r++)
      {
        if (cache [0][a][r] || cache[1][a][r])v++;
      } 
      
      for (n=0,r=0; r< 26 && v<ncat; r++)
      {
        if (cache [0][a][r] && !cache[1][a][r])
          {
            n++;
            profile[a][r]=-cache[0][a][r];
          }
      }
      if (n) npos++;
    }
  
  free_arrayN((void***)cache,3);
  return profile;

}
Alignment * filter_aln4sar0 ( Alignment *A, Alignment *S, int comp, int leave, char *mode)
{
  return copy_aln (A,NULL);
}
Alignment * filter_aln4sar1 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode)
{
  Alignment *F, *A;
  int a, b,c, i,r, n0, n1,g, score;
  int ***cache, **list1, **list2;
  int Delta;

  int T1;
  
  /*Keep only the positions where there are residues ONLY associated with 0 sequences*/
  
  list1=declare_int ( inA->nseq, 2);
  list2=declare_int ( inA->len_aln, 2);

  cache=declare_arrayN (3,sizeof (int),inA->len_aln,2, 26);
  F=copy_aln (inA, NULL);
  
  A=copy_aln (inA, NULL);
  A->nseq=strlen (S->seq_al[comp]);

  strget_param (mode, "_T1_", "5", "%d", &T1);
  for ( a=0; a< A->len_aln; a++)
    {
      n1=n0=g=0;
      for (b=0; b< A->nseq; b++)
      {
        if ( b==leave) continue;
        i=(S->seq_al[comp][b]=='I')?1:0;
        r=tolower(A->seq_al[b][a]);
        if ( r=='-')continue;
        cache[a][i][r-'a']++;
      }
    }
  
  for (a=0; a< A->nseq; a++)
    for ( score=0,b=0; b<A->len_aln; b++)
      {
      r=tolower (A->seq_al[a][b]);
      if ( is_gap(r))continue;
      else if ( cache[b][0][r-'a'] && !cache[b][1][r-'a'])list1[a][0]++;
      }
 
  for (a=0; a< A->len_aln; a++)
    {
      for ( score=0,b=0; b< A->nseq; b++)
      {
        r=tolower (A->seq_al[b][a]);
        if ( r=='-')continue;
        else r-='a';
        if ( cache[a][0][r] && !cache[a][1][r])score ++;
      }
      list2[a][0]=a;
      list2[a][1]=score;
    }
  sort_int (list2, 2, 1, 0, F->len_aln-1);
  
  Delta=A->len_aln/(100/T1);
  for ( a=0; a< F->len_aln-Delta; a++)
    {
      b=list2[a][0];
      for ( c=0; c<F->nseq; c++)
      {
        F->seq_al[c][b]='-';
      }
    }

  ungap_aln (F);
  free_aln (A);
  free_arrayN ( (void ***)cache, 3);
  free_arrayN ((void**)list1, 2);
  free_arrayN ((void**)list2, 2);

  return F;
}
Alignment * filter_aln4sar2 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode)
{
  Alignment *F, *A;
  int a,b,r,ncat;
  int *cache;
  int max_ncat=10;

  /*Keep Low entropy columns that contain less than ncat categories of different amino acids*/
  /*REmove columns containing 10% or more gaps*/
  
  cache=vcalloc ( 500, sizeof (char));
  F=copy_aln (inA, NULL);
  A=copy_aln (inA, NULL);
  A->nseq=strlen (S->seq_al[comp]);
  for ( a=0; a< A->len_aln; a++)
    {
      for (ncat=0,b=0; b< A->nseq; b++)
      {
        if ( b==leave) continue;

        r=tolower(A->seq_al[b][a]);
        if ( !cache[r])ncat++;
        cache[r]++;
      }
      
      if ( ncat <max_ncat && ((cache['-']*100)/A->nseq)<10)
      {
        ;
      }
      else
      {
        for (b=0; b<F->nseq; b++)
          {
            r=tolower(F->seq_al[b][a]);
            F->seq_al[b][a]='-';
            cache[r]=0;
          }
      }
      for (b=0; b<A->nseq; b++)
        {
          r=tolower(A->seq_al[b][a]);
          cache[r]=0;
        }
    }

  free_aln (A);
  ungap_aln (F);
  vfree (cache);
  return F;
}

Alignment * filter_aln4sar3 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode)
{
  Alignment *F, *rA, *A;
  int a, b,c;
  int **list1;
  char *bufS, *bufA;
  int Delta;
  int T3;
  
  /*Keep the 10% positions most correlated with the 0/1 pattern*/
  
  A=copy_aln (inA, NULL);
  A->nseq=strlen (S->seq_al[comp]);
  F=copy_aln (inA, NULL);
  rA=rotate_aln (A, NULL);
  
  strget_param (mode, "_T3_", "10", "%d", &T3);
  
  
  list1=declare_int ( inA->len_aln, 2);
  bufA=vcalloc ( A->nseq+1, sizeof (char));
  bufS=vcalloc ( A->nseq+1, sizeof (char));
  
  


  sprintf ( bufS, "%s", S->seq_al[comp]);
  splice_out_seg(bufS,leave, 1);
  
  
  for (a=0; a< A->len_aln; a++)
    {
      char aa;
      list1[a][0]=a;
      sprintf (bufA, "%s", rA->seq_al[a]);
      splice_out_seg (bufA,leave,1);
      list1[a][1]=(int)sar_vs_seq3 ( bufS, bufA,0,NULL, &aa);
    }

  sort_int (list1, 2, 1, 0, F->len_aln-1);
  Delta=F->len_aln/(100/T3);
  for ( a=0; a< F->len_aln-Delta; a++)
    {
        b=list1[a][0];
        
        for ( c=0; c<F->nseq; c++)
          {
            F->seq_al[c][b]='-';
          }

    }
  F->score_aln=list1[F->len_aln-1][1];
  ungap_aln (F);

  free_aln (rA);    
  free_aln(A);
  free_arrayN ((void**)list1, 2);
  vfree (bufS);vfree (bufA);
  return F;
}
Alignment * filter_aln4sar4 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode)
{
  Alignment *F, *A;
  int a, b,c, i,r, n0, n1,g,score;
  int ***cache, **list1, **list2;
 
  /*Keep only the positions where there are residues ONLY associated with 0 sequences*/
  
  list1=declare_int ( inA->nseq, 2);
  list2=declare_int ( inA->len_aln, 2);

  cache=declare_arrayN (3,sizeof (int),inA->len_aln,2, 26);
  F=copy_aln (inA, NULL);
  A=copy_aln (inA, NULL);
  A->nseq=strlen (S->seq_al[comp]);
  
  for ( a=0; a< A->len_aln; a++)
    {
      n1=n0=g=0;
      for (b=0; b< A->nseq; b++)
      {
        if ( b==leave) continue;
        i=(S->seq_al[comp][b]=='I')?1:0;
        r=tolower(A->seq_al[b][a]);
        if ( r=='-')continue;
        cache[a][i][r-'a']++;
        n1+=i;
      }
    }
  
  
  for (a=0; a< A->len_aln; a++)
    {
      for ( score=0,b=0; b< A->nseq; b++)
      {
        r=tolower (F->seq_al[b][a]);
        if ( r=='-')continue;
        else r-='a';
        if (cache[a][1][r]>=n1/2)score=1;
      }
      list2[a][0]=a;
      list2[a][1]=score;
    }

  
  for ( a=0; a< F->len_aln; a++)
    {
      if ( list2[a][1]==1);
      else
      {
        b=list2[a][0];
        for ( c=0; c<F->nseq; c++)
          {
            F->seq_al[c][b]='-';
          }
      }
    }
  ungap_aln (F);
  free_aln (A);
  free_arrayN ( (void ***)cache, 3);
  free_arrayN ((void**)list1, 2);
  free_arrayN ((void**)list2, 2);

  return F;
}

Alignment * filter_aln4sar5 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode)
{
  Alignment *F, *rA, *A;
  int a, b,c;
  int **list1;
  char *bufS, *bufA;
  int max;
  /*Look for the positions that show the best correlation between the sequence variation and the SAR*/

  A=copy_aln (inA, NULL);
  A->nseq=strlen (S->seq_al[comp]);
  
  rA=rotate_aln (inA, NULL);
  F=copy_aln (inA, NULL);
  
  list1=declare_int ( A->len_aln, 2);
  bufA=vcalloc ( A->nseq+1, sizeof (char));
  bufS=vcalloc ( A->nseq+1, sizeof (char));
  


  sprintf ( bufS, "%s", S->seq_al[comp]);
  splice_out_seg(bufS,leave, 1);
  
  
  for (a=0; a< A->len_aln; a++)
    {
      char aa;
      list1[a][0]=a;
      sprintf (bufA, "%s", rA->seq_al[a]);
      splice_out_seg (bufA,leave,1);
      list1[a][1]=(int)sar_vs_seq4 ( bufS, bufA,0,NULL, &aa);
    }

  sort_int (list1, 2, 1, 0, F->len_aln-1);
  max=F->score=list1[F->len_aln-1][1];
  max-=(max/10);
  
  
  for ( a=0; a< F->len_aln-10; a++)
    {
      
        b=list1[a][0];
        
        for ( c=0; c<F->nseq; c++)
          {
            F->seq_al[c][b]='-';
          }

    }
  F->score_aln=10;
  ungap_aln (F);
  free_aln (inA);
  free_aln (rA);    
  free_arrayN ((void**)list1, 2);
  vfree (bufS);vfree (bufA);
  return F;
}

int sar_profile2score ( char *seq, int **P)
{
  int a,r, l, score;
  
  l=strlen (seq);
  for ( score=0,a=0; a< l; a++)
    {
      r=seq[a];
      if ( is_gap(r))continue;
      score+=P[a][tolower(r)-'a'];
    }
  return score;
}
int make_sim_pred ( Alignment *A,Alignment *S, int comp, int seq)
{
  int a, b, i, r1, r2;
  static float **cscore;
  static float **tscore;

  if ( !cscore)
    {
      cscore=declare_float (2, 2);
      tscore=declare_float (2, 2);
    }
  
  for (a=0; a< 2; a++)for (b=0; b<2; b++)cscore[a][b]=tscore[a][b]=0;
  
  for ( a=0; a<A->len_aln; a++)
    {
      r1=A->seq_al[seq][a];
      if ( r1=='-') continue;
      else
      {
        for ( b=0; b< A->nseq; b++)
          {
            if (b==seq) continue;
            else
            {
              r2=A->seq_al[b][a];
              if (r2=='-')continue;
              else
                {
                  
                  i=(S->seq_al[comp][b]=='I')?1:0;
                  cscore[i][0]+=(r1==r2)?1:0;
                  cscore[i][1]++;
                }
            }
          }
       
        for (i=0; i<2; i++)
          {
            cscore[i][0]/=(cscore[i][1]==0)?1:cscore[i][1];
            tscore[i][0]+=cscore[i][0];tscore[i][1]++;
            cscore[i][0]=cscore[i][1]=0;
          }
      }
    }

  fprintf ( stdout, "\nn\t 1: %.2f 0: %.2f", tscore[1][0],tscore[0][0]);
  return ( tscore[1][0]>=tscore[0][0])?1:0;
}
      

Alignment * sar_analyze (Alignment *inA, Alignment *inS, char *mode)
{
  int ***sim,***glob_results, ***comp_results;
  int *count;
  int a,b,c,m;
  float *tot2;
  Alignment *A=NULL,*S=NULL,*F, *SUBSET;
  char *subset, *target;
  int jack, T, filter;
  filter_func *ff;
  int n_methods=0;
  char *prediction, *reliability;
  int pred_start=0, pred_end, ref_start=0, ref_end;
  int display, CSV=1, NONCSV=0;
  

  ff=vcalloc (6,sizeof (filter_func));
  ff[n_methods++]=filter_aln4sar0;
  ff[n_methods++]=filter_aln4sar1;
  ff[n_methods++]=filter_aln4sar2;
  ff[n_methods++]=filter_aln4sar3;
  /*
    ff[n_methods++]=filter_aln4sar4;
    ff[n_methods++]=filter_aln4sar5;
  */
  sim=vcalloc (n_methods, sizeof (int**));
  

  tot2=vcalloc ( 10, sizeof (float));
  subset=vcalloc ( 100, sizeof (char));
  target=vcalloc ( 100, sizeof (char));
  
  strget_param (mode, "_TARGET_", "no", "%s_", target);
  strget_param (mode, "_SUBSET_", "no", "%s_", subset);
  strget_param (mode, "_JACK_", "0", "%d", &jack);
  strget_param (mode, "_T_", "0", "%d", &T);
  strget_param (mode, "_FILTER_", "11", "%d", &filter);
  strget_param (mode, "_DISPLAY_", "0", "%d", &display);
  
  

  if ( !strm (target, "no"))
    {
      Alignment *T;
      T=main_read_aln(target, NULL);
      if ( T->len_aln !=inA->len_aln )
      {
        printf_exit ( EXIT_FAILURE,stderr, "Error: %s is incompatible with the reference alignment [FATAL:%s]",target,PROGRAM);
      }
      
      inA=stack_aln (inA, T);
      
    }

  if ( !strm(subset, "no")) 
    {
      SUBSET=main_read_aln (subset, NULL);
      sarset2subsarset ( inA, inS, &A, &S, SUBSET);
    }
  else
    {
      A=inA;
      S=inS;
    }
  

  prediction=vcalloc ( n_methods+1, sizeof (char));
  reliability=vcalloc ( n_methods+1, sizeof (char));
  
  glob_results=declare_arrayN(3, sizeof (int), n_methods*2, 2, 2);

  count=vcalloc (S->nseq, sizeof (int));
  for (a=0; a<S->nseq; a++)
    {
      int l;
      l=strlen (S->seq_al[a]);
      for ( b=0; b<l; b++)
      count[a]+=(S->seq_al[a][b]=='I')?1:0;
    }
  if ( display==CSV)
    {fprintf ( stdout, "\nCompound %s ; Ntargets %d", S->name[a],count[a]);
      pred_start=(strlen (S->seq_al[0])==A->nseq)?0:strlen (S->seq_al[0]);
      pred_end=A->nseq;
      for (a=pred_start; a< pred_end; a++)
      fprintf ( stdout, ";%s", A->name[a]);
      fprintf ( stdout, ";npred;");
    }
  
  
  for (a=0; a<S->nseq; a++)
    {
      int n_pred;
      comp_results=declare_arrayN(3, sizeof (int), n_methods*2, 2, 2);

      pred_start=(strlen (S->seq_al[a])==A->nseq)?0:strlen (S->seq_al[a]);
      pred_end=A->nseq;
      if ( display==CSV)fprintf ( stdout, "\n%s;%d", S->name[a],count[a]);
      
      for (n_pred=0,b=pred_start; b<pred_end;b++)
      {
        int t, score=0,pred, real;
        
        if ( display==NONCSV)fprintf ( stdout, "\n>%-15s %10s %c ", S->name[a], A->name[b], (pred_start==0)?S->seq_al[a][b]:'?');
        if (jack || b==pred_start)
          {
            for (m=0; m<n_methods; m++)
            {
              free_int (sim[m], -1);
              F=(ff[m]) (A,S,a,(jack==0)?-1:b, mode);
              sim[m]=aln2sim_mat(F, "idmat");
              free_aln (F);
            }
          }
        
        for (m=0; m<n_methods; m++)
          {
            int Nbsim=0,Ybsim=0,bsim=0;
            ref_start=0;
            ref_end=strlen (S->seq_al[m]);
            
            for (c=ref_start;c<ref_end; c++)
            {
              if ( b==c) continue;
              else if ( S->seq_al[a][c]=='O')
                {
                  Nbsim=MAX(Nbsim,sim[m][b][c]);
                }
              else 
                {
                  Ybsim=MAX(Ybsim,sim[m][b][c]);
                }
            }
            
            bsim=(Ybsim>Nbsim)?Ybsim:-Nbsim;
            pred=(bsim>0)?1:0;
            real=(S->seq_al[a][b]=='O')?0:1;
            comp_results[m][pred][real]++;
            glob_results[m][pred][real]++;
            score+=pred;
            prediction[m]=pred+'0';
            reliability[m]=(FABS((Ybsim-Nbsim))-1)/10+'0';
          }
        
        if ( score>0)n_pred++;
        prediction[m]=reliability[m]='\0';
        if (display==NONCSV)fprintf ( stdout, "Compound_Count:%d primary_predictions: %s Total: %d", count[a],prediction, score);
        else if ( display==CSV)fprintf ( stdout, ";%d", score);
        for (t=0; t<n_methods; t++)
          {
            if (score>t)
            {
              comp_results[t+n_methods][1][real]++;
              glob_results[t+n_methods][1][real]++;
            }
            else 
            {
              comp_results[t+n_methods][0][real]++;
              glob_results[t+n_methods][0][real]++;
            }
          }
      }
      if ( display==NONCSV)
      {if ( pred_start==0)display_prediction (comp_results, S,a, n_methods*2);}
      else fprintf (stdout, ";%d;",n_pred);
    }
  if ( display==NONCSV)if (pred_start==0)display_prediction (glob_results, S,-1, n_methods*2);
      
  
  exit (EXIT_SUCCESS);
}
float display_prediction (int ***count, Alignment *S, int c, int n)
{
  float tp,tn,fn,fp,sp,sn,sn2;
  int a, nm;

  nm=n/2;
  
  for (a=0; a<n; a++)
    {
      tp=count[a][1][1];
      tn=count[a][0][0];
      fp=count[a][1][0];
      fn=count[a][0][1];

      sn2=tp/(tp+fp);
      sn=tp/(tp+fn);
      sp=tn/(tn+fp);
      if ( a<nm)fprintf ( stdout, "\n>#Method %d Compound %15s sp=%.2f sn=%.2f sn2=%.2f",a, (c==-1)?"TOTAL":S->name[c],sp, sn, sn2 );
      else fprintf ( stdout, "\n>#Combined: T=%d Compound %15s sp=%.2f sn=%.2f sn2=%.2f",a-nm, (c==-1)?"TOTAL":S->name[c],sp, sn, sn2 );
    }
  fprintf ( stdout, "\n");
  return 0; 
}
 
float display_prediction_2 (int **prediction, int n,Alignment *A, Alignment *S, int field)
{
  int a, t, T;
  float max_sn, max_sp;
  
  if ( field==17 || field ==18) 
    {
      printf_exit ( EXIT_FAILURE, stderr, "\nERROR: Do not use filed %d in display_prediction", field);
    }
  
  sort_int_inv ( prediction, 10,field, 0, n-1);
  for (t=0,a=0; a<n; a++)
    {
      t+=prediction[a][3];
      prediction[a][17]=t;
    }

  for (t=0,a=n-1; a>=0; a--)
    {
      prediction[a][18]=t;
      t+=prediction[a][3];
    }

  max_sn=max_sp=T=0;
  for (a=0; a<n; a++)
    {
      float tp, fn, fp, sp, sn;
      
      tp=prediction[a][17];
      fn=prediction[a][18];
      fp=(a+1)-tp;
      
      sp=((tp+fp)==0)?0:tp/(tp+fp);
      sn=((tp+fn)==0)?0:tp/(tp+fn);

      if (sp>0.8)
      {
        if (sn>max_sn)
          {
            max_sn=sn;
            max_sp=sp;
            
            T=prediction[a][field];
          }
      }
    }
  if (max_sn==0)
      fprintf (stdout, "\n T =%d SN=%.2f SP= %.2f",T,max_sn,max_sp);
  else
      fprintf (stdout, "\n T =%d SN=%.2f SP= %.2f",T,max_sn,max_sp);
  
  return max_sn;
}


/************************************************************************************/
/*                NEW      ANALYZE     : SAR                                        */
/************************************************************************************/
float** cache2pred1 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);
float** cache2pred2 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);
float** cache2pred3 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);
float** cache2pred4 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);
float** cache2pred5 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);
float** cache2pred_new (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);

int **sar2cache_adriana ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);
int **sar2cache_proba_old ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);
int **sar2cache_count1 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);
int **sar2cache_count2 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);
int **sar2cache_count3 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);

int **sar2cache_proba_new ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);
int **sar2cache_proba2 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);
int **analyze_sar_compound1 ( char *name, char *seq, Alignment *A, char *mode);
int **analyze_sar_compound2 ( char *name, char *seq, Alignment *A, char *mode);

int aln2n_comp_col ( Alignment *A, Alignment *S, int ci);

double evaluate_sar_score1 ( int len, int n11, int n1a, int n1b);
double evaluate_sar_score2 ( int len, int n11, int n1a, int n1b);


int ***simple_sar_analyze_vot ( Alignment *inA, Alignment *SAR, char *mode);
int ***simple_sar_analyze_col ( Alignment *inA, Alignment *SAR, char *mode);


int sarset2subsarset ( Alignment *A, Alignment *S, Alignment **subA, Alignment **subS, Alignment *SUB);
int benchmark_sar (int v);
int aln2jack_group1 (Alignment *A, int seq, int **l1, int *nl1, int **l2, int *nl2);
int aln2jack_group2 (Alignment *A, int seq, int **l1, int *nl1, int **l2, int *nl2);
int aln2jack_group3 (Alignment *A, char *sar_seq, int **l1, int *nl1, int **l2, int *nl2);
float** jacknife5 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);
float** jacknife6 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);

int process_cache ( Alignment *A,Alignment *S, int ***Cache, char *mode);
Alignment *analyze_compounds (Alignment *A, Alignment *S, char *mode);

Alignment *analyze_compounds (Alignment *A, Alignment *S, char *mode)
{
  int a, b, c, tot, n;
  int **sim;
  int sar1, sar2;
  
  sim=aln2sim_mat (A, "idmat");
  for (a=0; a< S->nseq; a++)
    {
      for (n=0, tot=0, b=0; b< A->nseq-1; b++)
      {
        sar1=(S->seq_al[a][b]=='I')?1:0;
        for ( c=b+1; c<A->nseq; c++)
          {
            sar2=(S->seq_al[a][c]=='I')?1:0;
            
            if (sar1 && sar2)
            {
              tot+=sim[b][c];
              n++;
            }
          }
      }
      fprintf ( stdout, ">%-10s   CMPSIM: %.2f\n", S->name[a],(float)tot/(float)n); 
    }
  free_int (sim, -1);
  return A;
}

int print_seq_pos ( int pos, Alignment *A, char *seq);
int abl1_evaluation (int p);
int print_seq_pos ( int pos, Alignment *A, char *seq)
{
  int a, b, s;
  
  s=name_is_in_list (seq, A->name, A->nseq, MAXNAMES);
  fprintf ( stdout, "S=%d", s);
  
  for (b=0,a=0; a<pos; a++)
    {
      if (!is_gap (A->seq_al[s][a]))b++;
    }
  fprintf ( stdout, "Pos %d SEQ %s: %d ", pos+1, seq, b+246);
  if ( strm ( seq, "ABL1")) fprintf ( stdout , "PT: %d", abl1_evaluation (b+246));
  return 0;
}

int process_cache ( Alignment *A,Alignment *S, int  ***Cache, char *mode)
{
  int a, b;
  int **pos, **pos2;
  int **C;
  int ab1, *ab1_pos;
  int weight_mode;
  
  strget_param ( mode, "_WEIGHT_", "1", "%d", &weight_mode);
  pos=declare_int(A->len_aln+1,2);
  pos2=declare_int (A->len_aln+1,S->nseq);
  for (a=0; a<S->nseq; a++)
    {
      C=Cache[a];
      for (b=0; b< A->len_aln; b++)
      {
          pos[b][0]+=C[26][b];
          if ( C[26][b]>0)
            {
            pos[b][1]++;
            pos2[b][a]=1;
            }
      }
    }
  
  C=Cache[0];
  ab1=name_is_in_list ("ABL1", A->name, A->nseq,100);
  ab1_pos=vcalloc (A->len_aln+1, sizeof (int));
  
  for ( b=0,a=0; a< A->len_aln; a++)
    {
      if ( A->seq_al[ab1][a]=='-')ab1_pos[a]=-1;
      else ab1_pos[a]=++b;
    }
    
  for ( a=0; a< A->len_aln; a++)
    {
      fprintf ( stdout, "\n%4d %5d %5d %5d [%c] [%2d] ALN", a+1, pos[a][0], pos[a][1], ab1_pos[a]+246,A->seq_al[ab1][a],abl1_evaluation (ab1_pos[a]+246));
      for ( b=0; b< S->nseq; b++)fprintf ( stdout, "%d", pos2[a][b]);
    }
  return 1;
}
int abl1_evaluation (int p)
{
  if ( p==248) return 10;
  if ( p==250) return 10;
  if ( p==253) return 10;
  if ( p==254) return 10;
  if ( p==255) return 9;
  if ( p==256) return 10;
  if ( p==257) return 5;
  if ( p==258) return 8;
  if ( p==269) return 8;
  if ( p==291) return 4;
  if ( p==294) return 8;
  if ( p==299) return 10;
  if ( p==306) return 0;
  if ( p==314) return 9;
  if ( p==315) return 10;
  if ( p==318) return 10;

  if ( p==319) return 10;
  if ( p==321) return 10;
  if ( p==323) return 0;
  if ( p==324) return 0;
  if ( p==339) return 0;
  if ( p==340) return 0;
  if ( p==355) return 5;
  if ( p==364) return 10;
  
  if ( p==366) return 0;
  if ( p==368) return 10;
  if ( p==370) return 10;
  if ( p==372) return 0;
  if ( p==378) return 8;
  if ( p==382) return 10;

  if ( p==384) return 10;
  if ( p==387) return 10;
  if ( p==395) return 8;

  if ( p==398) return 8;
  if ( p==399) return 8;
  if ( p==400) return 8;
  if ( p==403) return 0;
  if ( p==416) return 8;
  if ( p==419) return 5;
  if ( p>400) return 0;
  return -1;
}
float** cache2pred1 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1, s2, seq1, seq2, r1, r2,col, pred, real, ci;
  double score, max, id, m;
  float **R, T;
  
  
  int used_col, used_res,is_used_col, n_res=0;
  int weight_mode;
  /*Predict on ns[1] what was trained on ns[0]*/

  strget_param ( mode, "_THR_", "0.09", "%f", &T);
  strget_param ( mode, "_WEIGHT_", "0", "%d", &weight_mode);
  
  R=declare_float (2, 2);
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);

  

  for (s1=0; s1<ns[1]; s1++)
    {
      int v;
      seq1=ls[1][s1];
      
      for (max=0,score=0, col=0; col<A->len_aln; col++)
      {
        int max1;
        r1=tolower (A->seq_al[seq1][col]);
        for (max1=0,id=0, m=0,s2=0; s2<ns[0]; s2++)
          {
            seq2=ls[0][s2];
            if ( S->seq_al[ci][seq2]=='O')continue;
            if ( cache[seq2][col]==0 && !is_gap( A->seq_al[seq2][col]))continue;          
            
            r2=tolower ( A->seq_al[seq2][col]);
            if ( is_gap(r2))continue;
            
            v=(cache[seq2][col]>0 && weight_mode==1)?cache[seq2][col]:1;

            max+=v;
            if ( r2==r1)
            {
              score+=v;
            }
            
          }
        
      }
      pred=(( score/max) >T)?1:0;
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      R[pred][real]++;
      
      fprintf ( stdout, "\n>%s %d%d SCORE %.2f C %s [SEQ]\n", A->name[seq1],real, pred, (float)score/(float)max, compound);
    }

  for (used_col=0,used_res=0,col=0; col<A->len_aln; col++)
    {
      for (is_used_col=0,s2=0; s2<ns[0]; s2++)
      {
        seq2=ls[0][s2];
        if ( cache[seq2][col]==0 && !is_gap(A->seq_al[seq2][col]))n_res++;
        else if (is_gap(A->seq_al[seq2][col]));
        else 
          {
          is_used_col=1;
          used_res++;
          }
      }
      used_col+=is_used_col;
    }
  fprintf ( stdout, "\n>%s USED_POSITIONS: COL: %.2f RES: %.2f COMP\n", S->name[ci],  (float)used_col/(float)A->len_aln, (float)used_res/(float) n_res);
  
  return R;
}

float** cache2pred2 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1, s2, seq1, seq2, r1, r2,col, pred, real, ci;
  double score, max;
  float **R, T;
  
  
  int used_col, used_res,is_used_col, n_res=0;
  /*Predict on ns[1] what was trained on ns[0]*/

  strget_param ( mode, "_THR_", "0.5", "%f", &T);
  
  
  R=declare_float (2, 2);
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);

  for (s1=0; s1<ns[1]; s1++)
    {
      int v;
      seq1=ls[1][s1];
      fprintf ( stdout, "\n");
      for (max=0,score=0, col=0; col<A->len_aln; col++)
      {
        int used;
        
        r1=tolower (A->seq_al[seq1][col]);
        for (used=0,s2=0; s2<ns[0]; s2++)
          {
            seq2=ls[0][s2];
            
            if ( S->seq_al[ci][seq2]=='O')continue;
            if ( cache[seq2][col]==0 && !is_gap( A->seq_al[seq2][col]))continue;          
            

            r2=tolower ( A->seq_al[seq2][col]);
            if ( is_gap(r2))continue;
            
            v=cache[seq2][col];
            if ( r2==r1){score+=v;}
            used=1;
            max+=v;
          }
        if (used) fprintf ( stdout, "%c", r1);
      }

      pred=(( score/max) >T)?1:0;
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      R[pred][real]++;

      fprintf ( stdout, "PSEQ: %-10s SC: %4d MAX: %4d S: %.2f R: %4d", A->name[seq1],(int)score, (int)max, (float)score/max,real);

    }

  for (used_col=0,used_res=0,col=0; col<A->len_aln; col++)
    {
      for (is_used_col=0,s2=0; s2<ns[0]; s2++)
      {
        seq2=ls[0][s2];
        if ( cache[seq2][col]==0 && !is_gap(A->seq_al[seq2][col]))n_res++;
        else if (is_gap(A->seq_al[seq2][col]));
        else 
          {
          is_used_col=1;
          used_res++;
          }
      }
      used_col+=is_used_col;
    }
  fprintf ( stdout, "\n>%s USED_POSITIONS: COL: %.2f RES: %.2f COMP\n", S->name[ci],  (float)used_col/(float)A->len_aln, (float)used_res/(float) n_res);
  
  return R;
}

float** cache2pred3 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1, s2, seq1, seq2, r1, r2,col, pred, real, ci, a, n;
  double score, max;
  float **R, T;
  
  
  
  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  
  /*Predict on ns[1] what was trained on ns[0]*/

  strget_param ( mode, "_THR_", "0.5", "%f", &T);
  
  
  R=declare_float (2, 2);
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int ( ns[1],3);
  
  for (s1=0; s1<ns[1]; s1++)
    {
      int v;
      seq1=ls[1][s1];
      
      for (max=0,score=0, col=0; col<A->len_aln; col++)
      {
        int used;
        
        r1=tolower (A->seq_al[seq1][col]);
        for (used=0,s2=0; s2<ns[0]; s2++)
          {
            seq2=ls[0][s2];
            
            if ( S->seq_al[ci][seq2]=='O')continue;
            if ( cache[seq2][col]==0 && !is_gap( A->seq_al[seq2][col]))continue;          
            

            r2=tolower ( A->seq_al[seq2][col]);
            if ( is_gap(r2))continue;
            
            v=cache[seq2][col];
            if ( r2==r1){score+=v;}
            used=1;
            max+=v;
          }
      }

     

      pred=(( score/max) >T)?1:0;
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      
      list[s1][0]=real;
      list[s1][1]=(int)((score/max)*(float)1000);
      list[s1][2]=seq1;
      
      

    }
  sort_int_inv (list, 3, 1, 0, ns[1]-1);
    
  for ( a=0; a<ns[1]; a++)
    {
      seq1=list[a][2];
      fprintf ( stdout, "PSEQ: %-10s SC: %5d R: %4d\n", A->name[seq1],list[a][0], list[a][1]);
    }

  for (n=0, a=0; a<ns[1]; a++)n+=list[a][0];
  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<ns[1]; a++)
    {
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
      {
        best_delta=delta;
        best_tp=tp;
        best_fp=fp;
      }
    }
  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=ns[1]-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  return R;
}
float** cache2pred4 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1, s2, seq1, seq2, ci, a,b, c, n;
  double score;
  float **R;


  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  int **sim;
  int *ul;
  int nused=0;
  
  /*Predict on ns[1] what was trained on ns[0]*/
  /*Identify interesting coloumns*/
  ul=vcalloc ( A->len_aln, sizeof (int));
  for (a=0; a< A->len_aln; a++)
    for ( b=0; b< A->nseq; b++)
      if ( cache[b][a])ul[nused++]=a;
  
  /*compute the similarity on the used columns*/
  
  R=declare_float (2, 2);
  sim=declare_int (A->nseq, A->nseq);
  for (a=0; a< A->nseq; a++)
    for ( b=0; b< A->nseq; b++)
      {
      for (c=0; c< nused; c++)
        {
          if ( A->seq_al[a][ul[c]]==A->seq_al[b][ul[c]])sim[a][b]++;
        }
      sim[a][b]=(sim[a][b]*100)/nused;
      }
  vfree (ul);
  
    
  
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int ( ns[1],2);
  
  for (s1=0; s1<ns[1]; s1++)
    {

      seq1=ls[1][s1];

      for (score=0,s2=0; s2<ns[0]; s2++)
      {
        seq2=ls[0][s2];

        if ( seq1==seq2)continue;
        if (S->seq_al[ci][seq2]=='I')score=MAX(score, sim[seq1][seq2]);
      }
      list[s1][0]=(S->seq_al[ci][seq1]=='I')?1:0;
      list[s1][1]=(int)score;

    }
  sort_int_inv (list, 2, 1, 0, ns[1]-1);
  
  for (n=0, a=0; a<ns[1]; a++)n+=list[a][0];
  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<ns[1]; a++)
    {
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
      {
        best_delta=delta;
        best_tp=tp;
        best_fp=fp;
      }
    }
  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=ns[1]-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  free_int (sim, -1);
  return R;
}

float** cache2pred5 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1, s2, seq1, seq2, ci, a, n;
  double score;
  float **R;
  
  
  
  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  static int **sim;
  
  /*Predict on ns[1] what was trained on ns[0]*/
  
  R=declare_float (2, 2);

  if ( sim==NULL)
    sim=aln2sim_mat (A, "idmat");
  
  
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int ( ns[1],2);
  
  for (s1=0; s1<ns[1]; s1++)
    {

      seq1=ls[1][s1];

      for (score=0,s2=0; s2<ns[0]; s2++)
      {
        seq2=ls[0][s2];

        if ( seq1==seq2)continue;
        if (S->seq_al[ci][seq2]=='I')score=MAX(score, sim[seq1][seq2]);
      }
      list[s1][0]=(S->seq_al[ci][seq1]=='I')?1:0;
      list[s1][1]=(int)score;

    }
  sort_int_inv (list, 2, 1, 0, ns[1]-1);
  
  for (n=0, a=0; a<ns[1]; a++)n+=list[a][0];
  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<ns[1]; a++)
    {
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
      {
        best_delta=delta;
        best_tp=tp;
        best_fp=fp;
      }
    }
  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=ns[1]-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  return R;
}

float** jacknife5 (Alignment*A,int **cacheIN, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int seq1, ci, a,b, c, n;
  double score, max_score;
  float **R;


  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  int **cache;
 
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int (A->nseq,2);
  R=declare_float (2, 2);
  

  for ( a=0; a<A->nseq; a++)
    {
      int real, res;

      ns[0]=A->nseq-1;
      ns[1]=1;
      for (c=0,b=0; b<A->nseq; b++)
      if (a!=b)ls[0][c++]=b;
      ls[1][0]=a;
      
      
      cache=sar2cache_count1 (A, ns, ls,S, compound, mode);
      for (b=0; b<=26; b++)
      for ( c=0; c< A->len_aln; c++)
        cacheIN[b][c]+=cache[b][c];
      
      seq1=a;
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      fprintf ( stdout, ">%-10s %d ", A->name[seq1], real);
      

      
      for (max_score=0,b=0; b<A->len_aln; b++)
      max_score+=cache[26][b];
      
      for (score=0,b=0; b<A->len_aln; b++)
      {
        res=tolower (A->seq_al[seq1][b]);
        if ( cache[26][b]==0) continue;
        if ( !is_gap(res))
          {
            score+=cache[res-'a'][b];
          }
        /*fprintf ( stdout, "%c[%3d]", res,b);*/
      }
      fprintf ( stdout, " SCORE: %5d SPRED %d RATIO: %.2f \n", (int)score, a, (score*100)/max_score);
      list[a][0]=real;
      
      if ( strstr (mode, "SIMTEST"))list[a][1]=(score*100)/max_score;
      else list[a][1]=(score*100)/max_score;
      free_int (cache, -1);
    }
  

  sort_int_inv (list, 2, 1, 0, A->nseq-1);
  for (n=0, a=0; a<A->nseq; a++)
    {
      n+=list[a][0];
    }

  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<A->nseq; a++)
    {
      
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
      {
        best_delta=delta;
        best_tp=tp;
        best_fp=fp;
      }
    }
  
  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=A->nseq-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  
  return R;
}
float** jacknife6 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int seq1, ci, a,b, c,d,e,f, n;
  double score;
  float **R;
  
  
  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int (A->len_aln,2);
  R=declare_float (2, 2);
  

  for ( a=0; a<A->nseq; a++)
    {
      int sar, res;
      int **new_cache;
      
      ns[0]=A->nseq-1;
      ns[1]=1;
      for (c=0,b=0; b<A->nseq; b++)
      if (a!=b)ls[0][c++]=b;
      ls[1][0]=a;
      
      cache=sar2cache_proba_new (A, ns, ls,S, compound, mode);
      

      new_cache=declare_int (27,A->len_aln);
      
      for (d=0; d< A->len_aln; d++)
      {
        int **analyze;
        if ( cache[26][d]==0)continue;
        analyze=declare_int (26, 2);
        
        for ( e=0; e< ns[0]; e++)
          {
            f=ls[0][e];
            sar=(S->seq_al[ci][f]=='I')?1:0;
            res=tolower (A->seq_al[f][d]);
            
            if ( res=='-') continue;
            analyze[res-'a'][sar]++;
          }
        for (e=0;e<26; e++)
          {
            if ( analyze[e][1]){new_cache[26][d]=1;new_cache[e][d]+=cache[e][d];}
            /*
            if ( analyze[e][0] && analyze[e][1]){new_cache[26][d]=1;new_cache[e][d]+=analyze[e][1];}
            else if ( analyze[e][0]){new_cache[26][d]=1;new_cache[e][d]-=analyze[e][0]*10;}
            else if ( analyze[e][1]){new_cache[26][d]=1;new_cache[e][d]+=analyze[e][1];}
            else if ( !analyze[e][0] &&!analyze[e][1]);
            */
          }
        free_int (analyze, -1);
      }

      seq1=a;
      sar=(S->seq_al[ci][seq1]=='I')?1:0;
      fprintf ( stdout, ">%-10s %d ", A->name[seq1], sar);
      
      for (score=0,b=0; b<A->len_aln; b++)
      {
        res=tolower (A->seq_al[seq1][b]);
        if ( cache[26][b]==0) continue;
        if ( !is_gap(res))
          {
            score+=new_cache[res-'a'][b];
          }
      }
      fprintf ( stdout, " SCORE: %5d SPRED\n", (int)score);
      list[seq1][0]=sar;
      list[seq1][1]=(int)score;
    
      free_int (new_cache, -1);
      free_int (cache, -1);
    }
  sort_int_inv (list, 2, 1, 0, A->nseq-1);
  for (n=0, a=0; a<A->nseq; a++)n+=list[a][0];
  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<A->nseq; a++)
    {
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
      {
        best_delta=delta;
        best_tp=tp;
        best_fp=fp;
      }
    }
  
  fprintf ( stderr, "\n%d %d", best_tp, best_fp);
  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=A->nseq-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  

  return R;
}
float** cache2pred_new (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1, seq1, ci, a,b, n;
  double score;
  float **R;


  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int ( ns[1],2);
  R=declare_float (2, 2);
  
  for (s1=0; s1<ns[1]; s1++)
    {
      int res, real;
     
      seq1=ls[1][s1];
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      fprintf ( stdout, ">%-10s %d ", A->name[seq1], real);
      for (score=0,b=0; b<A->len_aln; b++)
      {
        res=tolower (A->seq_al[seq1][b]);
        if ( cache[26][b]==0) continue;
        if ( !is_gap(res))
          {
            score+=cache[res-'a'][b];
          }
        fprintf ( stdout, "%c", res);
      }
      fprintf ( stdout, " SCORE: %5d SPRED\n", (int)score);
      list[s1][0]=real;
      list[s1][1]=(int)score;
    }
    
  sort_int_inv (list, 2, 1, 0, ns[1]-1);
  
  for (n=0, a=0; a<ns[1]; a++)n+=list[a][0];
  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<ns[1]; a++)
    {
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
      {
        best_delta=delta;
        best_tp=tp;
        best_fp=fp;
      }
    }
  
  

  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=ns[1]-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  

  return R;
}
float** cache2pred_forbiden_res (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1,seq1, ci, a,b, c, n;
  double score;
  float **R;
  
  
  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  int **new_cache;
  int **mat;

  mat=read_matrice ( "blosum62mt");
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int ( ns[1],2);
  R=declare_float (2, 2);
  
  for (s1=0; s1<ns[1]; s1++)
    {
      int res, real;
     
      seq1=ls[1][s1];
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      fprintf ( stdout, ">%-10s %d ", A->name[seq1], real);
      for (score=0,b=0; b<A->len_aln; b++)
      {
        res=tolower (A->seq_al[seq1][b]);
        if ( cache[26][b]==0) continue;
        if ( !is_gap(res))
          {
            score+=cache[res-'a'][b];
          }
        fprintf ( stdout, "%c", res);
      }
      fprintf ( stdout, " SCORE: %5d SPRED\n", (int)score);
      list[s1][0]=real;
      list[s1][1]=(int)score;
    }
  new_cache=declare_int (27,A->len_aln);
  for (a=0; a< A->len_aln; a++)
    {
      int **analyze, real, res, d;
      int *res_type;
      int **sub;
      int *keep;
      keep=vcalloc ( 26, sizeof (int));
      res_type=vcalloc ( 26, sizeof (int));
      sub=declare_int (256, 2);
      
      if ( cache[26][a]==0)continue;
      analyze=declare_int (26, 2);
      for ( b=0; b< ns[0]; b++)
      {
        seq1=ls[0][b];
        real=(S->seq_al[ci][seq1]=='I')?1:0;
        res=tolower (A->seq_al[seq1][a]);
        
        if ( res=='-') continue;
        analyze[res-'a'][real]++;
      }
      fprintf ( stdout, "RSPRED: ");
      for (c=0;c<26; c++)fprintf ( stdout, "%c", c+'a');
      fprintf ( stdout, "\nRSPRED: ");
      for (c=0;c<26; c++)
      {
        if ( analyze[c][0] && analyze[c][1]){fprintf ( stdout, "1");res_type[c]='1';}
        else if ( analyze[c][0]){new_cache[26][a]=1;new_cache[c][a]-=analyze[c][0];fprintf ( stdout, "0");res_type[c]='0';}
        else if ( analyze[c][1]){new_cache[26][a]=1;new_cache[c][a]+=analyze[c][1];fprintf ( stdout, "1");res_type[c]='1';}
        else if ( !analyze[c][0] &&!analyze[c][1]){fprintf ( stdout, "-");res_type[c]='-';}
      }

     
      for ( c=0; c<26; c++)
      {
        for ( d=0; d<26; d++)
          {
            
            if ( res_type[c]==res_type[d])
            {
              sub[res_type[c]][0]+=mat[c][d];
              sub[res_type[c]][1]++;
            }
            if ( res_type[c]!='-' && res_type[d]!='-')
            {
              sub['m'][0]+=mat[c][d];
              sub['m'][1]++;
            }
          }
      }
      for ( c=0; c< 256; c++)
      {
        if ( sub[c][1])fprintf ( stdout, " %c: %5.2f ", c, (float)sub[c][0]/(float)sub[c][1]);
      }
      fprintf ( stdout, " SC: %d\nRSPRED  ", cache[26][a]);
      
      for ( c=0; c<26; c++)
      if ( res_type[c]=='1')
        {
          for (d=0; d<26; d++)
            if (mat[c][d]>0)keep[d]++;
          keep[c]=9;
        }

      for (c=0; c<26; c++)
      {
        if ( keep[c]>10)fprintf ( stdout, "9");
        else fprintf ( stdout, "%d", keep[c]);
      }
      for ( c=0; c<26; c++)
      {
        if ( keep[c]>8)new_cache[c][a]=10;
        else new_cache[c][a]=-10;
      }
      fprintf ( stdout, "\n");
      free_int (analyze, -1);
      free_int (sub, -1);
      vfree (res_type);
      vfree (keep);
      
    }
  for ( a=0; a<25; a++)
    for (b=a+1; b<26; b++)
      {
      int r1, r2;
      r1=a+'a';r2=b+'a';
      if ( strchr("bjoxz", r1))continue;
      if ( strchr("bjoxz",r2))continue;
      
      if ( mat[a][b]>0 && a!=b)fprintf ( stdout, "\nMATANALYZE %c %c %d", a+'a', b+'a', mat[a][b]);
      }
  
  for (s1=0; s1<ns[1]; s1++)
    {
      int res, real;
     
      seq1=ls[1][s1];
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      fprintf ( stdout, ">%-10s %d ", A->name[seq1], real);
      for (score=0,b=0; b<A->len_aln; b++)
      {
        res=tolower (A->seq_al[seq1][b]);
        if ( cache[26][b]==0) continue;
        if ( !is_gap(res))
          {
            score+=new_cache[res-'a'][b];
          }
        fprintf ( stdout, "%c", res);
      }
      fprintf ( stdout, " SCORE: %5d SPRED\n", (int)score);
      list[s1][0]=real;
      list[s1][1]=(int)score;
    }
  free_int (new_cache, -1);
  sort_int_inv (list, 2, 1, 0, ns[1]-1);
  
  
  for (n=0, a=0; a<ns[1]; a++)n+=list[a][0];
  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<ns[1]; a++)
    {
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
      {
        best_delta=delta;
        best_tp=tp;
        best_fp=fp;
      }
    }



  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=ns[1]-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  

  return R;
}

int **sar2cache_proba_old ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  int col, s, seq,ms,mseq, res, mres, res1, n,maxn1, maxn2,maxn3, t, ci, a;
  float quant=0;
  int **list;

  int N1msa,N1sar, N, N11, N10, N01,N00, SCORE, COL_INDEX, RES;
  int nfield=0;
  int value;
  float T1, T2, T3, T4;
  int weight_mode;
  int **cache;
  static int **sim;
  int sim_weight, w, sw_thr;
  int train_mode;
  
  float zscore;
  
  RES=nfield++;COL_INDEX=nfield++;N1msa=nfield++;N1sar=nfield++;N=nfield++;N11=nfield++;N10=nfield++;N01=nfield++;N00=nfield++;SCORE=nfield++;
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  cache=declare_int (A->nseq, A->len_aln);
  
  strget_param ( mode, "_FILTER1_", "0"   , "%f", &T1);
  strget_param ( mode, "_FILTER2_", "1000000", "%f", &T2);
  strget_param ( mode, "_FILTER3_", "0"   , "%f", &T3);
  strget_param ( mode, "_FILTER4_", "1000000", "%f", &T4);
  strget_param ( mode, "_SIMWEIGHT_", "1", "%d", &sim_weight);
  strget_param ( mode, "_SWTHR_", "30", "%d", &sw_thr);
  strget_param (mode, "_TRAIN_","1", "%d", &train_mode);
  strget_param (mode, "_ZSCORE_","0", "%f", &zscore);
  
  



  if (sim_weight==1 && !sim) sim=aln2sim_mat(A, "idmat");
  for ( ms=0; ms<ns[0]; ms++)
    {
      mseq=ls[0][ms];
      if ( S->seq_al[ci][mseq]!='I')continue;

      list=declare_int (A->len_aln+1, nfield);
      for (t=0,n=0, col=0; col< A->len_aln; col++)
      {
        int same_res;
        
        mres=tolower(A->seq_al[mseq][col]);
        list[col][RES]=mres;
        list[col][COL_INDEX]=col;

        if ( is_gap(mres))continue;
        for ( s=0; s<ns[0]; s++)
          {
            seq=ls[0][s];
            res=tolower(A->seq_al[seq][col]);
            if (is_gap(res))continue;
            

            if (sim_weight==1)
            {
              w=sim[seq][mseq];w=(mres==res)?100-w:w;
              if (w<sw_thr)w=0;
            }
            else
            w=1;
            
            if ( train_mode==4)
            {
              if ( S->seq_al[ci][seq]=='I')same_res=1;
              else same_res=(res==mres)?1:0;
            }
            else
            same_res=(res==mres)?1:0;
            
            list[col][N]+=w;
            
            if (S->seq_al[ci][seq]=='I' && same_res)list[col][N11]+=w;
            else if (S->seq_al[ci][seq]=='I' && same_res)list[col][N10]+=w;
            else if (S->seq_al[ci][seq]=='O' && same_res)list[col][N01]+=w;
            else if (S->seq_al[ci][seq]=='O' && same_res)list[col][N00]+=w;
            
            if ( S->seq_al[ci][seq]=='I')list[col][N1sar]+=w;
            if ( same_res)list[col][N1msa]+=w;
            
          }
        
        list[col][SCORE]=(int)evaluate_sar_score1 (list[col][N], list[col][N11], list[col][N1msa], list[col][N1sar]);
        
      }

      strget_param ( mode, "_MAXN1_", "5", "%d", &maxn1);
      strget_param ( mode, "_WEIGHT_", "1", "%d", &weight_mode);
      strget_param ( mode, "_QUANT_", "0.0", "%f", &quant);
      
      sort_int_inv (list,nfield,SCORE,0,A->len_aln-1);
      if ( quant !=0)
      {
      
        n=quantile_rank ( list,SCORE, A->len_aln,quant);
        sort_int (list,nfield,N1msa, 0, n-1);
        maxn1=MIN(n,maxn1);
      }
      
      for (a=0; a<maxn1; a++)
      {
        col=list[a][COL_INDEX];
        res1=list[a][RES];
        value=list[a][SCORE];
        if ( value>T1 && value<T2){cache[mseq][col]= value;}
      }
      free_int (list, -1);
    }
  
  /*Filter Columns*/
  list=declare_int (A->len_aln+1, nfield);
  for ( col=0; col< A->len_aln; col++)
    {
      list[col][COL_INDEX]=col;
      for ( s=0; s<ns[0]; s++)
      {
        seq=ls[0][s];
        list[col][SCORE]+=cache[seq][col];
      }
    }
 
  /*Filter Columns with a score not between T2 and T3*/
  
  for (col=0; col< A->len_aln; col++)
    if (list[col][SCORE]<T3 || list[col][SCORE]>T4)
      {
      list[col][SCORE]=0;
      for (s=0; s< A->nseq; s++)
        if (!is_gap(A->seq_al[s][col]))cache[s][col]=0;
      }
  
  /*Keep The N Best Columns*/
  if ( zscore!=0)
    {
      double sum=0, sum2=0, z;
      int n=0;
      for (a=0; a< A->len_aln; a++)
      {
        if ( list[a][SCORE]>0)
          {
            sum+=list[a][SCORE];
            sum2+=list[a][SCORE]*list[a][SCORE];
            n++;
          }
      }
      for (a=0; a<A->len_aln; a++)
      {
        if ( list[a][SCORE]>0)
          {
            z=return_z_score (list[a][SCORE], sum, sum2,n);
            if ((float)z<zscore)
            {  
              col=list[a][COL_INDEX];
              for (s=0; s<A->nseq; s++)
                cache [s][col]=0;
            }
            else
            {
              fprintf ( stdout, "\nZSCORE: KEEP COL %d SCORE: %f SCORE: %d\n", list[a][COL_INDEX], (float)z, list[a][SCORE]);
            }
          }
      }
    }
  else
    {
      sort_int_inv (list,nfield,SCORE,0,A->len_aln-1);
      strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2);
      
      for (a=maxn2;a<A->len_aln; a++)
      {
        col=list[a][COL_INDEX];
        for (s=0; s<A->nseq; s++)
          cache [s][col]=0;
      }
    }

  /*Get Rid of the N best Columns*/;
  strget_param ( mode, "_MAXN3_", "0", "%d", &maxn3);
  
  for (a=0; a<maxn3;a++)
    {
      col=list[a][COL_INDEX];
      for (s=0; s<A->nseq; s++)
      cache [s][col]=0;
    }
  
  return cache;
}
int aln2n_comp_col ( Alignment *A, Alignment *S, int ci)
{
  int  res, seq,sar, col, r;
  int **analyze;

  int tot=0;
  
  analyze=declare_int (27, 2);  
  for ( col=0; col< A->len_aln; col++)
    {
      int n1, n0;

      
      for ( n1=0, n0=0,seq=0; seq<A->nseq; seq++)
      {
        res=tolower(A->seq_al[seq][col]);
        sar=(S->seq_al[ci][seq]=='I')?1:0;
        n1+=(sar==1)?1:0;
        n0+=(sar==0)?1:0;
        if ( res=='-')continue;
        res-='a';
        analyze[res][sar]++;
      }
      
      for (r=0; r<26; r++)
      {
        int a0,a1;
        a0=analyze[r][0];
        a1=analyze[r][1];
        
        
        if ( a1==n1 && a0<n0)
          {
            tot++;
          }
      }
      for ( r=0; r<26; r++)analyze[r][0]=analyze[r][1]=0;
    }
  
  free_int (analyze, -1);
  return tot;
}
int **sar2cache_count1 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  int  maxn2, res, seq,sar, ci, col,s, r;
  int **analyze, **list, **cache;
  static int **mat;

  int a0,a1, w;
  if (!mat) mat=read_matrice ("blosum62mt");
  
  
  list=declare_int ( A->len_aln, 2);
  cache=declare_int ( 27, A->len_aln);
  analyze=declare_int (27, 2);  
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  
  for ( col=0; col< A->len_aln; col++)
    {
      int n1, n0;

      
      for ( n1=0, n0=0,s=0; s<ns[0]; s++)
      {
        seq=ls[0][s];
        res=tolower(A->seq_al[seq][col]);
        sar=(S->seq_al[ci][seq]=='I')?1:0;
        n1+=(sar==1)?1:0;
        n0+=(sar==0)?1:0;
        if ( res=='-')continue;
        res-='a';
              
        analyze[res][sar]++;
      }
      
      for (r=0; r<26; r++)
      {
        
        a0=analyze[r][0];
        a1=analyze[r][1];
        
        if ( strstr (mode, "SIMTEST"))
          {
            w=a1;
          }
        else if (a1 )
          {
            w=n0-a0;
          }
        else w=0;
        
        cache[r][col]+=w;
        cache[26][col]=MAX(w, cache[26][col]);
      }
      
      for ( r=0; r<26; r++)analyze[r][0]=analyze[r][1]=0;
      list[col][0]=col;
      list[col][1]=cache[26][col];
    }
  
  free_int (analyze, -1);

  sort_int_inv (list, 2, 1, 0, A->len_aln-1);
  
  strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2);

  for ( col=maxn2; col<A->len_aln; col++)
    for ( r=0; r<=26; r++)cache[r][list[col][0]]=0;
 
  free_int (list, -1);
  return cache;
}


int **sar2cache_count2 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  int maxn2, res, seq,sar, ci, col,s, r;
  int **analyze, **list, **cache, **conseq;
  static int **mat;
  int w=0;
  if (!mat) mat=read_matrice ("blosum62mt");
  
  
  list=declare_int ( A->len_aln, 2);
  cache=declare_int ( 27, A->len_aln);
  conseq=declare_int ( A->len_aln,3);

  analyze=declare_int (27, 2);  
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);  
  for ( col=0; col< A->len_aln; col++)
    {
      int n1, n0;

      for ( n1=0, n0=0,s=0; s<ns[0]; s++)
      {
        seq=ls[0][s];
        res=tolower(A->seq_al[seq][col]);
        sar=(S->seq_al[ci][seq]=='I')?1:0;
        n1+=(sar==1)?1:0;
        n0+=(sar==0)?1:0;
        if ( res=='-')continue;
        res-='a';
        analyze[res][sar]++;
      }
      for (r=0; r<26; r++)
      {
        int a0,a1;
        a0=analyze[r][0];
        a1=analyze[r][1];
        if ( a1==n1 && a0<n0)
          {
                                    
            w=n0-a0;
            conseq[col][0]=r;
            conseq[col][1]=w;
          }
      }
      for ( r=0; r<26; r++)analyze[r][0]=analyze[r][1]=0;
    }
  free_int (analyze, -1);
  
  for (s=0; s<ns[0]; s++)
    {
      int w1, w2;
      seq=ls[0][s];
      for (w1=0,w2=0,col=0; col<A->len_aln; col++)
      {

        res=tolower(A->seq_al[seq][col]);
        if ( is_gap(res))continue;
        else res-='a';
        
        if ( conseq[col][1] && res!=conseq[col][0])w1++;
        if ( conseq[col][1])w2++;
      }
      for (col=0; col<A->len_aln; col++)
      {
        res=tolower(A->seq_al[seq][col]);
        if ( is_gap(res))continue;
        else res-='a';
        
        if ( conseq[col][1] && res!=conseq[col][0])conseq[col][2]+=(w2-w1);
      }
    }
  
  for (col=0; col<A->len_aln; col++)
    {
      r=conseq[col][0];
      w=conseq[col][2];

      
      cache[r][col]=cache[26][col]=list[col][1]=w;
      list[col][0]=col;
    }
  sort_int_inv (list, 2, 1, 0, A->len_aln-1);
  strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2);

  for ( col=maxn2; col<A->len_aln; col++)
    for ( r=0; r<=26; r++)cache[r][list[col][0]]=0;
 

  free_int (list, -1);
  return cache;
}  

int **sar2cache_count3 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  int  maxn2, res, seq,sar, ci, col,s, r, a1, a0, n1, n0;
  int **analyze, **list, **cache;
  static int **mat;
  
  if (!mat) mat=read_matrice ("blosum62mt");
  
  
  list=declare_int ( A->len_aln, 2);
  cache=declare_int ( 27, A->len_aln);
  analyze=declare_int (27, 2);  
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  
  for ( col=0; col< A->len_aln; col++)
    {
      double e, g;
      for ( n1=0, n0=0,s=0; s<ns[0]; s++)
      {
        seq=ls[0][s];
        res=tolower(A->seq_al[seq][col]);
        sar=(S->seq_al[ci][seq]=='I')?1:0;
        n1+=(sar==1)?1:0;
        n0+=(sar==0)?1:0;
        if ( res=='-')continue;
        res-='a';
              
        analyze[res][sar]++;
      }
      
      /*Gap*/
      for (g=0,r=0; r<A->nseq; r++)
      g+=is_gap(A->seq_al[r][col]);
      g=(100*g)/A->nseq;
     
      /*enthropy
      for (e=0, r=0; r<26; r++)
      {
        a0=analyze[r][0];
        a1=analyze[r][1];
        t=a0+a1;
        
        if (t>0)
          e+= t/(double)A->nseq*log(t/(double)A->nseq);
      }
      e*=-1;
      */
      e=0;
      if (g>10) continue;
      if (e>10) continue;
      
      if ( strstr ( mode, "SIMTEST"))
      {
        for (r=0; r<26; r++)
          {
            
            a0=analyze[r][0];
            a1=analyze[r][1];
            
            if (a1)
            {
              cache[r][col]=a1;
              cache[26][col]=MAX(cache[26][col],a1);
            }
          }
      }
      else
      {
      
        
        
        for (r=0; r<26; r++)
          {
            
            a0=analyze[r][0];
            a1=analyze[r][1];
            
            if (!a1 && a0)
            {
              cache[r][col]=a0;
              cache[26][col]=MAX(cache[26][col],a0);
            }
          }
      }
      
      for ( r=0; r<26; r++)analyze[r][0]=analyze[r][1]=0;
      list[col][0]=col;
      list[col][1]=cache[26][col];
    }

  free_int (analyze, -1);

  sort_int_inv (list, 2, 1, 0, A->len_aln-1);
  
  strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2);

  for ( col=maxn2; col<A->len_aln; col++)
    for ( r=0; r<=26; r++)cache[r][list[col][0]]=0;
 
  free_int (list, -1);
  return cache;
}


int **sar2cache_proba_new ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  int col, s, seq,ms,mseq, res, mres, res1, n,maxn1, maxn2,maxn3, t, ci, a,w;

  int **list;

  int N1msa,N1sar, N, N11, N10, N01,N00, SCORE, COL_INDEX, RES;
  int nfield=0;
  int value;
  

  int **cache;
  static int **sim;
  int sw_thr;
  float zscore;
  
  RES=nfield++;COL_INDEX=nfield++;N1msa=nfield++;N1sar=nfield++;N=nfield++;N11=nfield++;N10=nfield++;N01=nfield++;N00=nfield++;SCORE=nfield++;
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  cache=declare_int (27, A->len_aln);
  
  strget_param ( mode, "_SWTHR_", "30", "%d", &sw_thr);
  strget_param (mode, "_ZSCORE_","0", "%f", &zscore);
  
 
  if (!sim)sim=aln2sim_mat(A, "idmat");
  for ( ms=0; ms<ns[0]; ms++)
    {
      mseq=ls[0][ms];
      if ( S->seq_al[ci][mseq]!='I')continue;

      list=declare_int (A->len_aln+1, nfield);
      for (t=0,n=0, col=0; col< A->len_aln; col++)
      {
        int same_res;
        
        mres=tolower(A->seq_al[mseq][col]);
        if ( is_gap(mres))continue;
        
        list[col][RES]=mres;
        list[col][COL_INDEX]=col;

        for ( s=0; s<ns[0]; s++)
          {
            seq=ls[0][s];
            res=tolower(A->seq_al[seq][col]);
            if (is_gap(res))continue;
            w=sim[seq][mseq];w=(mres==res)?100-w:w;
            if (w<sw_thr)w=0;
            same_res=(res==mres)?1:0;
            
            list[col][N]+=w;
            
            if (S->seq_al[ci][seq]=='I' && same_res)list[col][N11]+=w;
            else if (S->seq_al[ci][seq]=='I' && same_res)list[col][N10]+=w;
            else if (S->seq_al[ci][seq]=='O' && same_res)list[col][N01]+=w;
            else if (S->seq_al[ci][seq]=='O' && same_res)list[col][N00]+=w;
            
            if ( S->seq_al[ci][seq]=='I')list[col][N1sar]+=w;
            if ( same_res)list[col][N1msa]+=w;
            
          }

        list[col][SCORE]=(int)evaluate_sar_score1 (list[col][N], list[col][N11], list[col][N1msa], list[col][N1sar]);
        
      }
      strget_param ( mode, "_MAXN1_", "5", "%d", &maxn1);
      sort_int_inv (list,nfield,SCORE,0,A->len_aln-1);
      for (a=0; a<maxn1; a++)
      {
        col=list[a][COL_INDEX];
        res1=list[a][RES];
        value=list[a][SCORE];

        if ( res1!=0)
          {
            cache[res1-'a'][col]+= value;
            cache[26][col]+=value;
          }
      }
      free_int (list, -1);
    }

  /*Filter Columns*/
  list=declare_int (A->len_aln+1, nfield);
  for ( col=0; col< A->len_aln; col++)
    {
      list[col][COL_INDEX]=col;
      list[col][SCORE]=cache[26][col];
    }
  /*Keep The N Best Columns*/
  if ( zscore!=0)
    {
      double sum=0, sum2=0, z;
      int n=0;
      for (a=0; a< A->len_aln; a++)
      {
        if ( list[a][SCORE]>0)
          {
            sum+=list[a][SCORE];
            sum2+=list[a][SCORE]*list[a][SCORE];
            n++;
          }
      }
      for (a=0; a<A->len_aln; a++)
      {
        if ( list[a][SCORE]>0)
          {
            z=return_z_score (list[a][SCORE], sum, sum2,n);
            if ((float)z<zscore)
            {  
              col=list[a][COL_INDEX];
              for (s=0; s<27; s++)
                cache [s][col]=0;
            }
            else
            {
              fprintf ( stdout, "\nZSCORE: KEEP COL %d SCORE: %f SCORE: %d\n", list[a][COL_INDEX], (float)z, list[a][SCORE]);
            }
          }
      }
    }
  else
    {
      sort_int_inv (list,nfield,SCORE,0,A->len_aln-1);
      strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2);
      
      for (a=maxn2;a<A->len_aln; a++)
      {
        col=list[a][COL_INDEX];
        for (s=0; s<27; s++)
          cache [s][col]=0;
      }
    }

  /*Get Rid of the N best Columns*/;
  strget_param ( mode, "_MAXN3_", "0", "%d", &maxn3);
  
  for (a=0; a<maxn3;a++)
    {
      col=list[a][COL_INDEX];
      for (s=0; s<27; s++)
      cache [s][col]=0;
    }
  return cache;    
}
int **sar2cache_adriana ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  
  int col,maxn1, s, seq,ms,mseq, res, mres,res1, n, t, ci, a;
  float quant=0;
  int **list;


  int **cache;
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  cache=declare_int (A->nseq, A->len_aln);
    
  
  for ( ms=0; ms<ns[0]; ms++)
    {
      mseq=ls[0][ms];
      if ( S->seq_al[ci][mseq]!='I')continue;

      list=declare_int (A->len_aln+1, 5);
      for (t=0,n=0, col=0; col< A->len_aln; col++)
      {
        mres=tolower(A->seq_al[mseq][col]);
        list[col][0]=mres;
        list[col][1]=col;

        if ( is_gap(mres))continue;
        for ( s=0; s<ns[0]; s++)
          {
            seq=ls[0][s];
            res=tolower(A->seq_al[seq][col]);
            if (is_gap(res))continue;
            
            if (S->seq_al[ci][seq]=='I' && res==mres)list[col][3]++;
            if (res==mres)list[col][2]++;
          }
      }
      
      sort_int_inv (list,5,3,0,A->len_aln-1);
      
      strget_param ( mode, "_MAXN1_", "5", "%d", &maxn1);
      strget_param ( mode, "_QUANT_", "0.95", "%f", &quant);
      
      n=quantile_rank ( list, 3, A->len_aln,quant);
      sort_int (list, 5, 2, 0, n-1);
      
      for (a=0; a<maxn1; a++)
      {
       
        col=list[a][1];
        res1=list[a][0];
        cache[mseq][col]=list[a][3];
      }
      free_int (list, -1);
     
    }
  return cache;
}
int **sar2cache_proba2 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  int col, s, seq,ms,mseq, res, mres,n,maxn1, t, ci, a,b;
  int COL, SCORE;
  
  float quant=0;
  int **list;

  float T1, T2, T3, T4;

  int **cache;
  cache=declare_int ( A->nseq, A->len_aln);
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
      
  strget_param ( mode, "_FILTER1_", "0"   , "%f", &T1);
  strget_param ( mode, "_FILTER2_", "1000000", "%f", &T2);
  strget_param ( mode, "_FILTER3_", "0"   , "%f", &T3);
  strget_param ( mode, "_FILTER4_", "1000000", "%f", &T4);
  
  list=declare_int (A->len_aln+1,A->nseq+2);
  SCORE=A->nseq;
  COL=A->nseq+1;
  
  for ( ms=0; ms<ns[0]; ms++)
    {
      mseq=ls[0][ms];
      if ( S->seq_al[ci][mseq]!='I')continue;

      for (t=0,n=0, col=0; col< A->len_aln; col++)
      {
        int N11=0,N10=0,N01=0,N00=0,N1sar=0,N1msa=0,N=0;
              
        mres=tolower(A->seq_al[mseq][col]);
        if ( is_gap(mres))continue;
        for ( s=0; s<ns[0]; s++)
          {
            seq=ls[0][s];
            res=tolower(A->seq_al[seq][col]);
            if (is_gap(res))continue;
            
            N++;
            if (S->seq_al[ci][seq]=='I' && res==mres)N11++;
            else if (S->seq_al[ci][seq]=='I' && res!=mres)N10++;
            else if (S->seq_al[ci][seq]=='O' && res==mres)N01++;
            else if (S->seq_al[ci][seq]=='O' && res!=mres)N00++;

            if ( S->seq_al[ci][seq]=='I')N1sar++;
            if ( res==mres)N1msa++;
          }
        list[col][mseq]=(int)evaluate_sar_score1 (N,N11,N1msa,N1sar);
        list[col][SCORE]+=list[col][mseq];
        list[col][COL]=col;
      }
    }

  strget_param ( mode, "_MAXN1_", "5", "%d", &maxn1);
  strget_param ( mode, "_QUANT_", "0.95", "%f", &quant);
  sort_int_inv (list,A->nseq+2,SCORE, 0, A->len_aln-1);
  n=quantile_rank ( list,A->nseq, A->len_aln,quant);
  n=5;
  

  for (a=0; a<n; a++)
    {
      int value;
      
      col=list[a][COL];
      for ( b=0; b<A->nseq; b++)
      {
        value=list[col][b];
        if ( value>T1 && value<T2){cache[b][col]= value;}  
      }
    }
  
  free_int (list, -1);
  return cache;
}

  
        

/************************************************************************************/
/*                ALIGNMENT ANALYZE     : SAR                                            */
/************************************************************************************/
int aln2jack_group3 (Alignment *A,char *comp, int **l1, int *nl1, int **l2, int *nl2)
{
  int **seq_list, **sar_list, nsar=0, nseq=0;
  int a, b, mid;

  vsrand (0);
  sar_list=declare_int (A->nseq, 2);
  seq_list=declare_int (A->nseq, 2);
  for (a=0; a< A->nseq; a++)
    {
      if (comp[a]=='I')
      {
        sar_list[nsar][0]=a;
        sar_list[nsar][1]=rand()%100000;
        nsar++;
      }
      else
      {
        seq_list[nseq][0]=a;
        seq_list[nseq][1]=rand()%100000;
        nseq++;
      }
    }
  
  
  l1[0]=vcalloc (A->nseq, sizeof (int));
  l2[0]=vcalloc (A->nseq, sizeof (int));
  nl1[0]=nl2[0]=0;
  
  sort_int (seq_list, 2, 1, 0,nseq-1);
  sort_int (sar_list, 2, 1, 0,nsar-1);
  mid=nsar/2;
  for (a=0; a<mid; a++)
    {
      l1[0][nl1[0]++]=sar_list[a][0];
    }
  for (a=0,b=mid; b<nsar; b++, a++)
    {
      l2[0][nl2[0]++]=sar_list[b][0];
    }

  mid=nseq/2;
  for (a=0; a<mid; a++)
    {
      l1[0][nl1[0]++]=seq_list[a][0];
    }
  for (a=0,b=mid; b<nseq; b++, a++)
    {
      l2[0][nl2[0]++]=seq_list[b][0];
    }

  
  free_int (seq_list, -1);
  free_int (sar_list, -1);
  return 1;
}

int aln2jack_group2 (Alignment *A, int seq, int **l1, int *nl1, int **l2, int *nl2)
{
  int **list;
  int a, b, mid;
  

  list=declare_int (A->nseq, 2);
  l1[0]=vcalloc (A->nseq, sizeof (int));
  l2[0]=vcalloc (A->nseq, sizeof (int));
  nl1[0]=nl2[0];
  
  vsrand (0);
  for ( a=0; a< A->nseq; a++)
    {
      list[a][0]=a;
      list[a][1]=rand()%100000;
    }
  sort_int (list, 2, 1, 0,A->nseq-1);
  mid=A->nseq/2;
  for (a=0; a<mid; a++)
    {
      l1[0][nl1[0]++]=list[a][0];
    }
  for (a=0,b=mid; b<A->nseq; b++, a++)
    {
      l2[0][nl2[0]++]=list[b][0];
    }

  free_int (list, -1);
  return 1;
}
int aln2jack_group1 (Alignment *A, int seq, int **l1, int *nl1, int **l2, int *nl2)
{
  int **sim;
  int **list;
  int a, mid;
  
  list=declare_int ( A->nseq, 3);
  l1[0]=vcalloc (A->nseq, sizeof (int));
  l2[0]=vcalloc (A->nseq, sizeof (int));
  nl1[0]=nl2[0];
  
  sim=aln2sim_mat (A, "idmat");
  for ( a=0; a< A->nseq; a++)
    {
      list[a][0]=seq;
      list[a][1]=a;
      list[a][2]=(a==seq)?100:sim[seq][a];
    }
  sort_int_inv (list, 3, 2, 0, A->nseq-1);
  fprintf ( stderr, "\nJacknife fromsequence %s [%d]\n", A->name[seq], seq);
  mid=A->nseq/2;
  for (a=0; a< mid; a++)
    l1[0][nl1[0]++]=list[a][1];
  for (a=mid; a<A->nseq; a++)
    l2[0][nl2[0]++]=list[a][1];
  return 1;
}
  
      
int sarset2subsarset ( Alignment *A, Alignment *S, Alignment **subA, Alignment **subS, Alignment *SUB)
{
  Alignment *rotS, *intS;
  int a,b, *list, nl;
  
  list=vcalloc ( SUB->nseq, sizeof (int));
  for (nl=0,a=0; a<SUB->nseq; a++)
    {
      b=name_is_in_list(SUB->name[a], A->name, A->nseq, 100);
      if ( b!=-1)list[nl++]=b;
    }

  subA[0]=extract_sub_aln (A, nl, list);
  rotS=rotate_aln (S, NULL);
  intS=extract_sub_aln (rotS, nl, list);
    
  subS[0]=rotate_aln (intS, NULL);

  for ( a=0; a<S->nseq; a++) sprintf ( (subS[0])->name[a], "%s", S->name[a]);
  
  
  return 0;
}

int ***simple_sar_analyze_vot ( Alignment *A, Alignment *SAR, char *mode)
{
  int a, b, c, d;
  int res1, res2, sar1, sar2;
  float s;
  int **sim;
  static float ***result;
  static int ***iresult;
  if (!result)
    {
    result=declare_arrayN (3,sizeof (float),SAR->nseq, A->len_aln,3);
    iresult=declare_arrayN (3,sizeof (int),SAR->nseq, A->len_aln,3);
    }

  sim=aln2sim_mat (A, "idmat");
  
  
  for (a=0; a<SAR->nseq; a++)
    for (b=0; b<A->len_aln; b++)
      result[a][b][0]=1;
 
  for ( a=0; a< SAR->nseq; a++)
    for ( b=0; b<A->nseq-1; b++)
      for ( c=b+1; c< A->nseq; c++)
      for ( d=0; d<A->len_aln; d++)
        {
          res1=A->seq_al[b][d];
          res2=A->seq_al[c][d];

          sar1=(SAR->seq_al[a][b]=='I')?1:0;
          sar2=(SAR->seq_al[a][c]=='I')?1:0;
          
          s=sim[b][c];
          
          
          
          
          if ( sar1!=sar2 && res1!=res2)
            result[a][d][0]*=(1/(100-s));
          
          else if ( sar1==sar2 && sar1==1 && res1==res2)
            result[a][d][0]*=1/s;
          
          
          

          /*
          else if ( sar1==sar2 && res1==res2)result[a][d][0]+=(100-s)*(100-s);
          else if ( sar1==sar2 && res1!=res2)result[a][d][0]-=s*s;
          else if ( sar1!=sar2 && res1==res2)result[a][d][0]-=(100-s)*(100-s);
          */
          
          result[a][d][1]='a';
        }
  for ( a=0; a<SAR->nseq; a++)
    for ( b=0; b<A->len_aln; b++)
      {
      fprintf ( stderr, "\n%f", result[a][b][0]);
      iresult[a][b][0]=100*log(1-result[a][b][0]);
      }
  return iresult;
}


int ***simple_sar_analyze_col ( Alignment *inA, Alignment *SAR, char *mode)
{
  Alignment *A;
  double score=0, best_score=0;
  int best_pos=0;
  int a, b;
 
  static int ***result;
  int **sim;
  char aa;
  int sar_mode=1;
  
  if (!result)
    result=declare_arrayN (3,sizeof (int),SAR->nseq, inA->len_aln, 3);
  

  sim=aln2sim_mat (inA, "idmat");
  A=rotate_aln (inA, NULL);
  
  
  for ( a=0; a<SAR->nseq; a++)
    {
      best_pos=best_score=0;
      fprintf ( stderr, "[%d/%d]", a, SAR->nseq);
      for ( b=0; b<A->nseq; b++)
      {
        if ( sar_mode==3)
          score=sar_vs_seq3(SAR->seq_al[a], A->seq_al[b],100, sim, &aa);
        else if ( sar_mode==2)
          score=sar_vs_seq2(SAR->seq_al[a], A->seq_al[b],100, sim, &aa);
        else
          score=sar_vs_seq1(SAR->seq_al[a], A->seq_al[b],100, sim, &aa);
        
        result[a][b][0]+=score*10;
        result[a][b][1]=aa;
      }
    }
  
  return result;
 }



double sar_vs_seq1 ( char *sar, char *seq, float gl, int **sim, char *best_aa)
{
  double score=0, return_score=0;
  int RN,N11, Nmsa, Nsar, N, N10, N01, N00;
  int a, b, r, s, res, res1, res2;
  double Ng=0;
  static int **mat;
  static int *aa;
  
  /*measure the E-Value for every amino acid. Returns the best one*/

  if ( mat==NULL)
    {
      mat=read_matrice ("idmat");
    }
  
  N=strlen (sar);
  for (a=0; a<N; a++)
    Ng+=is_gap(seq[a]);
  Ng/=N;
  
  if (Ng>gl) return 0;
  
  if (!aa)aa=vcalloc (256, sizeof(int));
  for ( a=0; a<N; a++)aa[tolower(seq[a])]=1;
  
  best_aa[0]='-';
  for (a=0; a<26; a++)
    {
      if (!aa['a'+a]);
      else
      {
        RN=Nmsa=Nsar=N11=N10=N01=N00=0;
        res='a'+a;
        for (b=0; b<N; b++)
          {
                        
            res1=tolower(seq[b]);
            if (res1=='-')r=0;
            else
            {
              res1-='A';
              res2=res-'A';
            
              r=(mat[res1][res2]>0)?1:0;
            }
            
            if ( sar[b]!='o')
            {
              s=(sar[b]=='I')?1:0;
              
              Nmsa+=r; Nsar+=s;
              N11+=(r && s)?1:0;
              N01+=(!r &&s)?1:0;
              N10+=(r && !s)?1:0;
              N00+=(!r && !s)?1:0;
              RN++;
            }
          }
        if (N11)
          {
            score=evaluate_sar_score1 ( RN, N11, Nmsa, Nsar);
          }
        else
          {
            score=0;
          }
        
        if ( score>return_score)
          {
            best_aa[0]='a'+a;
            return_score=score;
          }
      }
    }
  
  for ( a=0; a<N; a++)aa[tolower(seq[a])]=0;
  
  return return_score;
}

double sar_vs_seq4 ( char *sar, char *seq, float gl, int **sim, char *best_aa)
{

  int N11, Nmsa, Nsar, N, N10, N01, N00;
  int a, b, r, s;
  double Ng=0, ratio;
  int *aa;
  
  /*Correlation between AA conservation and Activity*/
  
  N=strlen (sar);
  for (a=0; a<N; a++)
    Ng+=is_gap(seq[a]);
  Ng/=N;
  if (gl<1)Ng*=100;
  
  if (Ng>gl) return 0;

  aa=vcalloc ( 256, sizeof (int));
  for (b=0; b<N; b++)
    {

      s=(sar[b]=='I')?1:0;
      if (s)aa[(int)seq[b]]=1;
    }
  N11=N10=N01=N00=Nmsa=Nsar=0;
  for (b=0; b<N; b++)
    {
      
      r=aa[(int)seq[b]];
      s=(sar[b]=='I')?1:0;
        
      Nmsa+=r; Nsar+=s;
      N11+=(r && s)?1:0;
      N01+=(!r &&s)?1:0;
      N10+=(r && !s)?1:0;
      N00+=(!r && !s)?1:0;
    }

  /*Sparce Matrix full of 0*/

  ratio=(float)Nsar/(float)N;
  
  if (ratio<0.2)
    {

      if    ((N11+N01+N10)==0) return 0;
      else  return ((100*N11)/(N11+N10+N01));

    }
  /*Sparce Matrix full of 1s*/
  else if (ratio>0.8)
    {
      if    ((N00+N01+N10)==0) return 0;
      else  return ((100*N00)/(N00+N10+N01));
    }
  /*Average Matrix*/
  else 
    {
      if ( N==0) return 0;
      else return ((100*(N11+N00))/N);
    }
}

double sar_vs_seq3 ( char *sar, char *seq, float gl, int **sim, char *best_aa)
{
  double score=0;
  int N11, Nmsa, Nsar, N, N10, N01, N00;
  int a, b, r, s;
  double Ng=0;
  int *aa;

  /*measure the E-Value if all the 1AA are considered like alphabet 1*/
  
  N=strlen (sar);
  for (a=0; a<N; a++)
    Ng+=is_gap(seq[a]);
  Ng/=N;
  
  if (Ng>gl) return 0;

  aa=vcalloc ( 256, sizeof (int));
  for (b=0; b<N; b++)
    {

      s=(sar[b]=='I')?1:0;
      if (s)aa[(int)seq[b]]=1;
    }
  N11=N10=N01=N00=Nmsa=Nsar=0;
  for (b=0; b<N; b++)
    {
      
      r=aa[(int)seq[b]];
      s=(sar[b]=='I')?1:0;
        
      Nmsa+=r; Nsar+=s;
      N11+=(r && s)?1:0;
      N01+=(!r &&s)?1:0;
      N10+=(r && !s)?1:0;
      N00+=(!r && !s)?1:0;
    }
  
  if (N11)
    {
      score=evaluate_sar_score1 ( N, N11, Nmsa, Nsar);
    }
  else score=0;
  
  vfree (aa);
  return score;
  
}

double sar_vs_seq2 ( char *sar, char *seq, float gl, int **sim_mat, char *best_aa)
{
  double score=0, return_score=0;
  int L,N11, Nmsa, Nsar,N10, N01, N;
  int a, b,c,d, r1, s1,r2, s2, res;
  double Ng=0;
  int sim, diff, w;
  char string[5];

  /*Weighted E-Value Similarity*/
  L=strlen (sar);
  for (a=0; a<L; a++)
    Ng+=is_gap(seq[a]);
  Ng/=L;
  
  if (Ng>gl) return 0;
  for (a=0; a<26; a++)
    {

      N=Nmsa=Nsar=N11=N10=N01=0;
      res='a'+a;
      for (d=0,b=0; b<L; b++)d+=((tolower(seq[b]))==res)?1:0;
      if ( d==0) continue;
      
      for (b=0; b<L; b++)
      {
        r1=(tolower(seq[b])==res)?1:0;
        s1=(sar[b]=='I')?1:0;
        for ( c=0; c<L; c++)
          {
            r2=(tolower(seq[c])==res)?1:0;
            s2=(sar[c]=='I')?1:0;
          
            sprintf ( string, "%d%d%d%d", r1,s1, r2, s2);
            sim= sim_mat[b][c]/10;
            diff=10-sim;
            
            if (strm (string, "0000"))      {w=diff;N+=2*w;}
            else if ( strm (string, "0011")){w=sim ;N+=2*w ; N11+=w  ;N10+=0   ;N01+=w   ;Nmsa+=w   ;Nsar+=w;}
            else if ( strm (string, "1010")){w=diff;N+=2*w ; N11+=0  ;N10+=2*w ;N01+=0   ;Nmsa+=2*w ;Nsar+=0;}
            else if ( strm (string, "0101")){w=diff;N+=2*w;  N11+=0  ;N10+=0   ;N01+=2*w ;Nmsa+=0   ;Nsar+=2*w;}
            else if ( strm (string, "1111")){w=diff;N+=2*w;  N11+=2*w;N10+=0   ;N01+=0   ;Nmsa+=2*w ;Nsar+=2*w;}
            else if ( strm (string, "1001")){w=sim; N+=2*w;  N11+=0  ;N10+=w   ;N01+=w   ;Nmsa+=w;Nsar+=w;}
            else if ( strm (string, "0110")){w=sim; N+=2*w;  N11+=0  ;N10+=w   ;N01+=w   ;Nmsa+=w;Nsar+=w;}
          }
      }
      if (N11)
      {
       
        score=evaluate_sar_score1 ( N, N11, Nmsa, Nsar);
      }
      return_score=MAX(return_score, score);
    }
  if ( return_score <0)fprintf ( stderr, "\n%.2f", return_score);
  return return_score;
}
  
float get_sar_sim (char *seq1, char *seq2)
{
  int a, l, s, r;
  int n11=0, n10=0, n01=0, n00=0;
  

  l=strlen (seq1);
  for ( a=0; a<l; a++)
    {
      s=(seq1[a]=='O')?0:1;
      r=(seq2[a]=='O')?0:1;

      n00+=(!s && !r)?1:0;
      n11+=(s && r)?1:0;
      n01+=(!s && r)?1:0;
      n10+=(s && !r)?1:0;
    }
  if ( n11==0) return 0;
  else return ((float)(n11)*100)/(float)(n11+n10+n01);
}
        

double evaluate_sar_score1 ( int N, int n11, int n1msa, int n1sar)
{
  double p;
  int n10, n01;
  
  n10=n1msa-n11;
  n01=n1sar-n11;
  
  if ( n11==0)return 0;
  /*if ( (n10)>n11 || n01>n11)return 0;*/
  

  p  = M_chooses_Nlog (n1msa, N) + M_chooses_Nlog (n1sar-n11, N-n1msa) + M_chooses_Nlog (n11, n1msa);

  p-=(M_chooses_Nlog (n1msa, N)+M_chooses_Nlog (n1sar, N));
  return -p;
  
}
double evaluate_sar_score2 ( int N, int n11, int n1msa, int n1sar)
{
  
  
  return n11-((n1msa-n11)+(n1sar-n11));
  
  if ( n11<n1msa) return 0;
  else if ( n11<n1sar) return 0;
  else if ( n11==N)return 0;
  return n11;
}


int benchmark_sar( int value)
{
  static int v[1000];
  static int a;

  if (a==0)
    {
      for (a=0; a< 1000; a++)v[a]=0;
      v[2]=1; 
      v[3]=2;
      v[6]=2; 
      v[7]=1;
      v[8]=2; 
      v[9]=1;
      v[10]=1; 
      v[11]=1;
      v[12]=2; 
      v[30]=2;
      v[31]=1; 
      v[32]=2;
      v[33]=1; 
      v[34]=2;
      v[35]=1;
      v[36]=1; 
      v[37]=2;
      v[43]=2; 
      v[44]=1;
      v[45]=2; 
      v[73]=2;
      v[74]=1; 
      v[75]=1;
      v[76]=2; 
      v[80]=2;
      v[81]=1; 
      v[82]=2;
      v[83]=1; 
      v[85]=2;
      v[86]=1;
      v[87]=1;
      v[88]=2; 
      v[89]=2;
      v[90]=1; 
      v[91]=2;
      v[92]=1; 
      v[93]=2;
      v[103]=2; 
      v[104]=1;
      v[105]=1; 
      v[106]=1;
      v[107]=2; 
      v[130]=2;
      v[131]=1; 
      v[132]=2;
      v[133]=1;
      v[134]=1; 
      v[135]=1;
      v[136]=2; 
      v[137]=1;
      v[138]=2;
      v[271]=2;
      v[272]=1; 
      v[273]=2;
      v[281]=2; 
      v[282]=1;
      v[283]=2; 
      v[284]=1;
      v[285]=1; 
      v[286]=1;
      v[287]=2;
      v[319]=2;
      v[320]=1; 
      v[321]=1;
      v[322]=1; 
      v[323]=1;
      v[324]=2; 
      v[325]=1;
      v[326]=2; 
      v[327]=1;
      v[328]=2; 
      v[356]=2;
      v[357]=1; 
      v[358]=1;
      v[359]=2; 
      v[377]=2;
      v[378]=1;
      v[379]=2; 
      v[386]=3;
      v[388]=2;
      v[389]=1; 
      v[390]=1;
      v[391]=1; 
      v[392]=2;
      v[393]=2; 
      v[394]=2;
      v[395]=1; 
      v[396]=1;
      v[397]=2; 
      v[399]=2;
      v[400]=1; 
      v[401]=2;
      v[414]=2;
      v[415]=1;
      v[416]=2; 
      v[420]=2;
      v[421]=1; 
      v[422]=1;
      v[423]=1; 
      v[424]=2;
      v[425]=1; 
      v[426]=2;
    }
  return v[value];
}

Alignment *weight2sar (Alignment *A, Alignment *SAR, char *weight_file, int limit)
{
  int a, b, c;
  int ***weight;
  char ***list;
  float score;
  
  weight=vcalloc (SAR->nseq, sizeof (int**));
  
  
  list=file2list (weight_file, " ");

  a=b=0;
  for (a=0; a< SAR->nseq; a++)
    {
      b=c=0;
      while (list[b])
      {
        if ( strm (list[b][1], SAR->name[a]) && atoi (list[b][3])>0)c++;
        b++;
      }

      weight[a]=declare_int (c+1, 3);
      fprintf ( stderr, "\n%s %d", SAR->name[a], c);
      b=c=0;
      while (list[b])
      {
        if ( strm (list[b][1], SAR->name[a]) && atoi (list[b][3])>0)
          {
            weight[a][c][0]=atoi(list[b][2])-1;
            weight[a][c][1]=list[b][5][0];
            weight[a][c][2]=atoi (list[b][3]);
            c++;
          }
        b++;
      }
      weight[a][c][0]=-1;
    }
 
  for (a=0; a<A->nseq; a++)
    {
      fprintf ( stdout, ">%s\n", A->name[a]);
      for ( b=0; b< SAR->nseq; b++)
      {
        score=seq2weighted_sar_score(A->seq_al[a], weight[b]);
        fprintf ( stdout, "%c", (score>limit)?'I':'O');
      }
      fprintf (stdout, "\n");
    }
  myexit (EXIT_SUCCESS);
  return A;
}
  
Alignment *display_sar ( Alignment *A, Alignment *SAR, char *compound)
{
  int a,c;
  char name[100];
  
  c=name_is_in_list ( compound, SAR->name, SAR->nseq, 100);
  if ( c==-1)return A;

  for ( a=0; a< A->nseq; a++)
    {
      sprintf (name, "%s", A->name[a]);
      sprintf ( A->name[a], "%c_%s_%s", SAR->seq_al[c][a], name,compound);
    }
  return A;
}
Alignment *aln2weighted_sar_score ( Alignment *A,Alignment *SAR, char *weight_file, char *compound)
{
  
  int a, b, c=0;
  int **weight;
  
  int score;
  char reactivity;
  char ***list;

  
  if ( SAR)
    {
      c=name_is_in_list (compound, SAR->name, SAR->nseq, 100);
    }
  
  list=file2list (weight_file, " ");
  a=b=0;
  while (list[a])
    {
      if (strm (list[a][1], compound))b++;
      a++;
    }
  weight=declare_int ( b+1, 3);
  
  
  a=b=0;
  while (list[a])
    {
      if ( !strm (list[a][1], compound) || strm ("TOTPOS", list[a][1]));
      else
      {
        weight[b][0]=atoi(list[a][2])-1;
        weight[b][1]=list[a][5][0];
        weight[b][2]=atoi(list[a][3]);
        b++;
      }
      a++;
    }
  weight[b][0]=-1;
  for ( a=0; a< A->nseq; a++)
    {
      score=seq2weighted_sar_score (A->seq_al[a], weight);
      reactivity=(!SAR || c==-1)?'U':SAR->seq_al[c][a];
      
      sprintf (A->seq_comment[a], "Compound %-15s Reactivity %c SAR_SCORE %5d", compound,reactivity, (int) score);
      
    }
  return A;
}

float seq2weighted_sar_score ( char *seq, int **weight)
{
  int a, p, r, w;
  float score=0;
  
  a=0;
  while (weight[a][0]!=-1)
    {
      p=weight[a][0];
      r=weight[a][1];
      w=weight[a][2];
      
      if ( is_gap(seq[p]));
      else if ( tolower(seq[p])==r)score+=w;
      a++;
    }
  return score;
  }

Alignment * sar2simpred (Alignment *A, Alignment *SAR, char *posfile, char *compound, int L1,int L2 )
{
  int a, b, c, c1, c2;
  int **sim, **sim_ref, npred=0;
  float n11, n10, n01, n00;
  float sn, sp; 
  
  int tot_sim=0;
  int N11=1, N01=2, N10=3, NXX=4, SIM=5;
  float ***tot;
  int i1, i2;
  
  
  n11=n10=n01=n00=0;
  tot=declare_arrayN(3,sizeof (float), 10, 6, 2);
  
  sim_ref=aln2sim_mat (A, "idmat");
  if (strm (posfile, "all"))
    sim=sim_ref;
  else
    {
      Alignment *B;
      B=copy_aln ( A,NULL);
      B=extract_aln3(B,posfile);
      
      /*if (B->len_aln==0)L1=100;
      else
      L1=((B->len_aln-1)*100)/B->len_aln;
      
      if (L1<=0)L1=100;
      */
      sim=aln2sim_mat (B, "idmat");
    }
  
  for (a=0; a< A->nseq-1; a++)
    {
      for ( b=a+1; b< A->nseq; b++)
      {
        for ( c=0; c<SAR->nseq; c++)
          {
            if ( (strm (compound, SAR->name[c]) || strm ( compound, "all")))
            {
              /*if ( sim_ref[a][b]<30 || sim_ref[a][b]>60)continue;*/
              i1=0; /*sim_ref[a][b]/10;if (i1==10)i1--;*/
              
              i2=sim[a][b];

              
              c1=(SAR->seq_al[c][a]=='I')?1:0;
              c2=(SAR->seq_al[c][b]=='I')?1:0;
              
              n11=(c1 && c2)?1:0;
              n01=(!c1 && c2)?1:0;
              n10=(c1 && !c2)?1:0;
              n00=(!c1 && !c2)?1:0;
              
              tot[i1][N11][0]+=n11;
              tot[i1][N01][0]+=n01;
              tot[i1][N10][0]+=n10;
              /*tot[i1][N00][0]+=n00;*/
              tot[i1][NXX][0]++;
              tot[i1][SIM][0]+=sim_ref[a][b];
              
              if ( i2>=L1)
                {
                  tot[i1][N11][1]+=n11;
                  tot[i1][N01][1]+=n01;
                  tot[i1][N10][1]+=n10;
                  /*tot[i1][N00][1]+=n00;*/
                  tot[i1][NXX][1]++;
                  tot[i1][SIM][1]+=sim_ref[a][b];
                }
            }
          }
      }
    }
  
  for (a=0; a<1; a++)
    {
      sp=(tot[a][N11][0])/(tot[a][N11][0]+tot[a][N10][0]);
      fprintf ( stdout, "\n%15s N11 %5d SP %.2f ",compound, (int)tot[a][N11][0],sp);
      sp=((tot[a][N11][1]+tot[a][N10][1])==0)?1:(tot[a][N11][1])/(tot[a][N11][1]+tot[a][N10][1]);
      sn=(tot[a][N11][0]==0)?1:(tot[a][N11][1]/tot[a][N11][0]);
      fprintf ( stdout, " N11 %5d SP %.2f SN %.2f SIM %.2f", (int)tot[a][N11][1], sp,sn, (tot[a][SIM][1]/tot[a][NXX][1]));
    }
  
  myexit (0);
  sp=((n11+n01)==0)?1:n11/(n11+n01);
  sn=((n11+n01)==0)?1:n11/(n11+n10);
  
  fprintf ( stdout, "\nLimit: %d NPRED %d AVGSIM %d SN %.2f   SP %.2f TP %d FP %d FN %d",L1, npred, tot_sim, sn, sp, (int)n11, (int)n01, (int)n10);
  myexit (EXIT_SUCCESS);
  return A;
}

Alignment * sar2simpred2 (Alignment *A, Alignment *SAR, char *seqlist, char *posfile, char *compound, int L )
{
  int a,b, c,c1, c2, p, s;
  float n11, n10, n01, n00, n, sn2, prediction,sp, n1, n0, t, entropy, Delta;
  int *rlist, *tlist, *pred, *npred, tsim, psim;
  int **sim, **sim_ref;
  int nr=0;
  int nrs;
  char *out;
  int delta_max;
  Alignment *B;
  int printall=1;

  out=vcalloc (A->nseq+1, sizeof (char));
  rlist=vcalloc ( A->nseq, sizeof (int));
  tlist=vcalloc ( A->nseq, sizeof (int));
  pred=vcalloc(2, sizeof (int));
  npred=vcalloc(2, sizeof (int));
  
  nrs=0;
  if ( strm (seqlist, "first"))
    {
      for ( a=0; a<SAR->nseq; a++)
      {
        if ( strm ( compound, SAR->name[a]))
          {
            for ( b=0; b<A->nseq; b++)
            {
              if ( SAR->seq_al[a][b]=='I')
                {
                  fprintf ( stderr, "COMP: %s REF SEQ: %s\n", A->name[b], compound);
                  rlist[nrs]=b;
                  tlist[rlist[nrs]]=1;
                  nrs++;
                  break;
                }
            }
          }
      }
    }
  else if (strm (seqlist, "all"))
    {
      for ( a=0; a< A->nseq; a++)
      {
        rlist[nrs]=a;
        tlist[rlist[a]]=1;
        nrs++;
      }
    }
  else if ((a=name_is_in_list ( seqlist, A->name, A->nseq, 100))!=-1)
    {
      rlist[nrs]=a;
      tlist[rlist[nrs]]=1;
      nrs++;
    }
  else
    {
      Alignment *R;
      R=main_read_aln (seqlist, NULL);
      for (a=0; a<R->nseq; a++)
      {
        rlist[a]=name_is_in_list( R->name[a], A->name, A->nseq, 100);
        tlist[rlist[a]]=1;
      }
      free_aln (R);
    }
  
  c=name_is_in_list ( compound, SAR->name, SAR->nseq, 100);
  
  sim_ref=aln2sim_mat (A, "idmat");
  if (strm (posfile, "all"))
    {
      sim=sim_ref;
      B=A;
    }
  else
    {
      B=copy_aln ( A,NULL);
      B=extract_aln3(B,posfile);
      sim=aln2sim_mat (B, "idmat");
    }
  
  n11=n10=n01=n00=n=n1=n0=0;
  delta_max=0;
  for (a=0; a<A->nseq; a++)
    {
      if ( tlist[a] && !strm (seqlist, "all"))
      out[a]=(SAR->seq_al[c][a]=='I')?'Z':'z';/*SAR->seq_al[c][a];*/
      else
      {
        
        pred[0]=pred[1]=0;
        npred[0]=npred[1]=1;
        c1=(SAR->seq_al[c][a]=='I')?1:0; 
        for (nr=0,tsim=0,psim=0,b=0; b<nrs; b++)
          {
            if ( SAR->seq_al[c][rlist[b]]=='o');
            else
            {
              c2=(SAR->seq_al[c][rlist[b]]=='I')?1:0;
              nr+=c2;
              s=sim[a][rlist[b]];
              tsim+=sim_ref[a][rlist[b]];
              psim+=sim[a][rlist[b]];
              if (s>=L)
                {
                  pred[c2]+=s;
                  npred[c2]++;
                }
            }
          }
        
        if (c1==0)n0++;
        else n1++;
        t++;
        
        
        Delta=pred[1]-pred[0];
        
        if (Delta<-delta_max){p=0;out[a]= (c1==0)?'O':'o';}
        else if (Delta>delta_max){p=1;out[a]=(c1==1)?'I':'i';}
        else {p=-1; out[a]=(c1==1)?'U':'u';}
        
        if ( p==-1);
        else if (  p &&  c1)n11++;
        else if (  p && !c1)n10++;
        else if ( !p && !c1)n00++;
        else if ( !p &&  c1)n01++;

        if (p!=-1)n++;
        if (printall)fprintf ( stdout, ">%-15s %d %c OVERALL_SIM:%d POSITION_SIM %d\n%s\n", B->name[a], c1, out[a],tsim/nrs,psim/nrs,B->seq_al[a]); 
      }
    }
  sp=((n11+n10)==0)?1:n11/(n11+n10);
  sn2=((n1)==0)?1:n11/n1;
  prediction=(n11+n00)/(n1+n0);
  entropy=(float)(M_chooses_Nlog (nr, nrs)/M_chooses_Nlog(nrs/2, nrs));
  
  fprintf ( stdout, ">%-15s Sp %.2f  Sn %.2f Pred %.2f E %.2f\n", compound,sp, sn2,prediction,entropy ); 
  fprintf ( stdout, "%s\n", out);
  
  myexit (EXIT_SUCCESS);
  return A;
}
/*********************************COPYRIGHT NOTICE**********************************/
/* Centre National de la Recherche Scientifique (CNRS) */
/*and */
/*Cedric Notredame */
/*Fri Oct 26 17:03:04     2007. */
/*All rights reserved.*/
/*This file is part of T-COFFEE.*/
/**/
/*    T-COFFEE is free software; you can redistribute it and/or modify*/
/*    it under the terms of the GNU General Public License as published by*/
/*    the Free Software Foundation; either version 2 of the License, or*/
/*    (at your option) any later version.*/
/**/
/*    T-COFFEE is distributed in the hope that it will be useful,*/
/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
/*    GNU General Public License for more details.*/
/**/
/*    You should have received a copy of the GNU General Public License*/
/*    along with Foobar; if not, write to the Free Software*/
/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
/*...............................................                                                                                      |*/
/*  If you need some more information*/
/*  cedric.notredame@europe.com*/
/*...............................................                                                                                                                                     |*/
/**/
/**/
/*    */
/*********************************COPYRIGHT NOTICE**********************************/

Generated by  Doxygen 1.6.0   Back to index