Logo Search packages:      
Sourcecode: t-coffee version File versions  Download package

pavie_dp.c

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stdarg.h>
#include <string.h>

#include "io_lib_header.h"
#include "util_lib_header.h"
#include "define_header.h"
#include "dp_lib_header.h"


static double mc_delta_matrix ( int ***mat1, int ***mat2, char **alp, int nch);
static double delta_matrix ( int **mat1,int **mat2, char *alp);
static double ***pavie_seq2pavie_fmat (Sequence *S,double *gop, double *gep, char **mat, char *idmat, int id_threshold, int sample_size, int nch, char *param  );
static int **pavie_fmat2pavie_logodd_mat (double **fmat, char *alp);
static double **pavie_aln2fmat(Alignment *A, double **fmat, char *idmat, int id_threshold, int ch, int nch, char *param);
static int pavie_mat2pavie_id_mat ( int **mat,char *in_name, char *alp, char *ignore, char *force,int T, char *out_name);
static double paviemat2gep ( int **mat, char *alp);
static Alignment *align_pavie_sequences (char *seq0,char *seq1,char **mat,double *gop,double *gep,int nch);
static int pavie_score (char *s0,int p0, char *s1,int p1,char **mat_file, double *gop, double *gep, int nch, float factor);
static char **seq2pavie_alp (Sequence *S, int nch);
static Sequence * seq2pavie_seq ( Sequence *S, int nch);
static FILE* output_pavie_aln (Alignment *A, int nch, FILE *fp);
static char **output_pavie_mat_list ( int ***current_mat, double *gep, char **alp, int nch,char *prefix,int cycle, char **mat_name);
static float pavie_aln2id ( Alignment *A, int mode);
static char * vstrstr ( char *string, char *token);
static int check_pavie_cl ( char *string);

Sequence * pavie_seq2random_seq ( Sequence *S, char *subst)
{
  int a, b, r, l;

  
  vsrand (0);
  r=subst[0]; subst++;
  l=strlen (subst);
  for ( a=0; a< S->nseq; a++)
    for (b=0; b<S->len[a]; b++)
      if ( S->seq[a][b]==r)S->seq[a][b]=subst[rand()%l];
  return S;
}

float **pavie_seq2pavie_aln(Sequence *S,char *mat, char *mode)
{
  int a, b,c, nch=0;
  char **mat_list;

  double *gep, *gop;
  Alignment *A;
  char **alp;
  char *pavie_idmat;
  FILE *fp;
  float **dist_mat;
  float score;
  
  check_pavie_cl (mode);
  
  mat_list=declare_char (100, 100);
  
  if ( is_matrix (mat))
    {
      sprintf ( mat_list[nch++], "%s", mat);
    }
  else
    {
      fp=vfopen (mat,"r");
      while ( (c=fgetc(fp))!=EOF)
      {
        ungetc(c, fp);
        fscanf (fp, "%s\n",mat_list[nch++]);
      }
      vfclose (fp);
    }
  
  alp=seq2pavie_alp (S, nch);
  S=seq2pavie_seq (S, nch);

  gop=vcalloc (nch, sizeof (double));
  gep=vcalloc (nch, sizeof (double));
 
  for ( a=0; a< nch; a++)
    {
      int **m;
      char *st;
      int v;
      m=read_matrice (mat_list[a]);
      if (st=vstrstr(mode, "_GEP"))
      {
        sscanf ( st, "_GEP%d_", &v);
        gep[a]=v*-1;
      }
      else
            {
        gep[a]=(int)m['a'-'A']['a'-'A'];
      }
      free_int (m, -1);
    }
  
  
  pavie_idmat=vtmpnam(NULL);
  pavie_mat2pavie_id_mat (NULL,"idmat", alp[0],"X","",1,pavie_idmat);
  dist_mat=declare_float ( S->nseq, S->nseq);
  

  for ( a=0; a< S->nseq-1; a++)
    {
      for ( b=a+1; b< S->nseq; b++)
        {
        A=align_pavie_sequences (S->seq[a],S->seq[b],mat_list,gop,gep,nch);
        sprintf ( A->name[0], "%s", S->name[a]);
        sprintf ( A->name[1], "%s", S->name[b]);
        if (strm (mode, "_ID01_"))A->score=score=pavie_aln2id (A, 1);
        else if ( vstrstr (mode, "_ID02_"))A->score=score=pavie_aln2id (A, 2);
        else if ( vstrstr (mode, "_ID04"))A->score=score=pavie_aln2id (A, 4);
        else if ( vstrstr (mode, "_ID05"))A->score=score=pavie_aln2id (A, 5);
        
        else A->score=score=pavie_aln2id (A, 1);
        
        if ( vstrstr (mode, "_MATDIST_"))    dist_mat[a][b]=dist_mat[b][a]=(100-score)*100;
        else if ( vstrstr (mode, "_MATSIM_"))dist_mat[a][b]=dist_mat[b][a]=(score)*100;
        
        
        if ( !vstrstr (mode, "_MAT") )
          {
            fprintf ( stdout, "#############\nAlignment %s %s: %d %% ID SCORE %d \n", S->name[a], S->name[b], A->score, A->score_aln);
            output_pavie_aln (A,nch, stdout);
          }
        free_aln(A);
        }
    }

  if ( vstrstr (mode, "_MAT"))
    {
      for ( a=0; a<S->nseq; a++)
      {
        fprintf ( stdout, "\n%s ", S->name[a]);
        for ( b=0; b< S->nseq; b++)
          fprintf ( stdout, "%6d ", (int)dist_mat[a][b]);
      }
    }
  
  return NULL;
}


int **pavie_seq2trained_pavie_mat(Sequence *S, char *param)
{
  double ***fmat;
  int ***current_mat;
  int ***previous_mat;
  char **alp;

  char **mat_file;
  double d,delta_min=10;
  double *gep;
  double *gop;
  
  char ignore[100];
  char force [100];
  char pavie_idmat[100];
  int id_threshold;
  int sample_size;
  char *b;
  int a,n=0,nch=1;

  check_pavie_cl (param);
  
  if ( !param)param=vcalloc (1, sizeof (char));

  if ((b=vstrstr(param,"_THR")))sscanf ( b, "_THR%d_", &id_threshold);
  else id_threshold=0;
  
  if ((b=vstrstr(param,"_SAMPLE")))sscanf ( b, "_SAMPLE%d_", &sample_size);
  else sample_size=0;
  
  if ((b=vstrstr(param,"_CHANNEL")))sscanf ( b, "_CHANNEL%d_", &nch);
  else nch=1;
  

  /*Declare Arrays*/
  gep=vcalloc (nch, sizeof (double));
  gop=vcalloc (nch, sizeof (double));
  mat_file=declare_char ( nch, 100);
  current_mat =vcalloc ( nch, sizeof (double**));
  previous_mat=vcalloc ( nch, sizeof (double**));
  

  sprintf (ignore, "X");
  sprintf (force, "");
  sprintf ( pavie_idmat, "pavie_idmat");
  
  alp=seq2pavie_alp (S, nch);
  S=seq2pavie_seq (S, nch);
 
  pavie_mat2pavie_id_mat (NULL,"idmat", alp[0],ignore,force,1,pavie_idmat);
  
  for ( a=0; a<nch; a++)sprintf (mat_file[a], "idmat");
  fmat=pavie_seq2pavie_fmat ( S,gop,gep,mat_file,pavie_idmat, id_threshold, sample_size, nch, param);
  

  for (a=0; a<nch; a++)
    {
      current_mat[a]=pavie_fmat2pavie_logodd_mat(fmat[a], alp[a]);
      gep[a]=paviemat2gep(current_mat[a], alp[a]);      
    }
  free_arrayN((void*)fmat, 3);
  
  mat_file=output_pavie_mat_list ( current_mat,gep, alp, nch,"", n++, mat_file); 
  
  
  
  fprintf ( stdout, "\n");
  previous_mat=vcalloc ( nch, sizeof (int**));
  while ((d=mc_delta_matrix (previous_mat, current_mat, alp, nch))>delta_min)
    {

      fprintf ( stdout, "\nDelta=%d: ",(int) d);
      for (a=0; a<nch; a++)
      {
        free_int (previous_mat[a], -1);
        previous_mat[a]=current_mat[a];
      }
      fprintf ( stdout, "\n");
      
      fmat=pavie_seq2pavie_fmat (S,gop,gep,mat_file, pavie_idmat, id_threshold, sample_size, nch, param);      
      
     
      for (a=0; a< nch; a++)
      {
        current_mat[a]=pavie_fmat2pavie_logodd_mat(fmat[a], alp[a]);
        gep[a]=paviemat2gep(current_mat[a], alp[a]);
      }
      
      mat_file=output_pavie_mat_list ( current_mat,gep, alp, nch,"", n, mat_file); 
      free_arrayN((void*)fmat, 3);
      n++;
    }
  
  fprintf ( stdout, "\nDelta=%d Mat: ",(int) d);
  for (a=0; a<nch; a++)
      {
        fprintf ( stdout, "%s ",mat_file[a]);
      }
  fprintf ( stdout, "\n");
  
  return current_mat[0];
}

double mc_delta_matrix ( int ***mat1, int ***mat2, char **alp, int nch)
{
  int a;
  double delta=0;
  if ( !mat1 || !mat2) return 100000;
  for ( a=0; a< nch; a++)
    delta+=delta_matrix (mat1[a], mat2[a], alp[a]);
  return delta/nch;
}
      
double delta_matrix ( int **mat1,int **mat2, char *alp)
{
  int ns;
  double delta, v;
  int a, b;
  
  if ( mat1==NULL || mat2==NULL) return 100000;
  
  ns=strlen (alp);
  for (delta=0, a=0; a< ns; a++)
    for ( b=0; b< ns; b++)
      {
      v=mat1[alp[a]-'A'][alp[b]-'A']-mat2[alp[a]-'A'][alp[b]-'A'];
      delta+=v*v;
      }
  delta=sqrt(delta);
  
  return delta;
}

double ***pavie_seq2pavie_fmat (Sequence *S,double *gop, double *gep, char **mat, char *idmat, int id_threshold, int sample_size, int nch, char *param  )
{
  int a, b, chan;
  double ***fmat=NULL;
  Alignment *A;



  fmat=vcalloc ( nch, sizeof (double **));
  if (sample_size==0)
    {
      for ( a=0; a< S->nseq-1; a++)
      {
        output_completion ( stderr,a+1,S->nseq,1, "");
        for ( b=a+1; b< S->nseq; b++)
          {
            
            A=align_pavie_sequences (S->seq[a],S->seq[b],mat,gop,gep,nch);
            
            for ( chan=0; chan< nch; chan++)
            {
             
              fmat[chan]=pavie_aln2fmat (A, fmat[chan], idmat, id_threshold, chan, nch, param);
            }
            free_aln (A);
          }
      }
    }
  else 
    {
      int tot=0, c;
      static int **list;

      if (!list)
      {
        list=declare_int (sample_size+1, 2);
        vsrand(0);
        tot=0;
        while (tot<sample_size)
          {
            a=rand()%(S->nseq);b=rand()%(S->nseq);
            if ( a!=b)
            {
              list[tot][0]=a;list[tot][1]=b;
              tot++;
            }
          }
      }
      for (tot=0,c=0; c<sample_size; c++, tot++)
      {
        a=list[c][0];b=list[c][1];
        A=align_pavie_sequences (S->seq[a],S->seq[b],mat,gop,gep,nch);
        for (chan=0; chan< nch; chan++)
          fmat[chan]=pavie_aln2fmat (A, fmat[chan], idmat, id_threshold,chan, nch, param);
       
        free_aln (A);
        output_completion ( stderr,tot,sample_size,1, "");
      }
    }
  return fmat;
}



int **pavie_fmat2pavie_logodd_mat (double **fmat, char *alp)
{
  char s1, s2,S1, S2;
  double r1, r2;
  int **mat;
  int a, b;
  int ns;

  ns=strlen (alp);
  mat=declare_int (256, 256);
  
  for (a=0; a<ns; a++)
    for (b=0; b<ns; b++)
      {
      s1=tolower(alp[a]);S1=toupper(alp[a]);
      s2=tolower(alp[b]);S2=toupper(alp[b]);

      

      r1=(fmat[s1][s2]+1)/(fmat[s1][s1]+1);
      r2=(fmat[s2][s1]+1)/(fmat[s2][s2]+1);
      mat[s1-'A'][s2-'A']=(int)(((log(r1)+log(r2))/2)*PAVIE_MAT_FACTOR);
      mat[S1-'A'][S2-'A']=(int)(((log(r1)+log(r2))/2)*PAVIE_MAT_FACTOR);
      mat[S1-'A'][s2-'A']=(int)(((log(r1)+log(r2))/2)*PAVIE_MAT_FACTOR);
      mat[s1-'A'][S2-'A']=(int)(((log(r1)+log(r2))/2)*PAVIE_MAT_FACTOR);
      
      }
  return mat;
}
      
double **pavie_aln2fmat(Alignment *A, double **fmat, char *idmat, int id_threshold, int ch, int nch, char *param)
{
  int a;
  int c1, c2;
  int w;
  int l,start;
  
  l=(A->len_aln/nch);
  start=l*ch;
  A->len_aln=l;
  A->seq_al[0]+=start;
  A->seq_al[1]+=start;
  


  if ( fmat==NULL)fmat=declare_double(300, 300);
  
  if (  vstrstr (param, "_TWE00_"))w=100;
  else if ( vstrstr (param, "_TWE01_"))w=pavie_aln2id (A, 1);
  else if ( vstrstr (param, "_TWE02_"))w=pavie_aln2id (A, 2);
  else if ( vstrstr (param, "_TWE03_"))w=pavie_aln2id (A, 3);
  else if ( vstrstr (param, "_TWE04_"))w=pavie_aln2id (A, 4);
  else if ( vstrstr (param, "_TWE05_"))w=pavie_aln2id (A, 5);
  else w=pavie_aln2id (A, 3);
      
  if (w<id_threshold) 
    {
      A->len_aln*=nch;
      A->seq_al[0]-=start;A->seq_al[1]-=start;
      return fmat;
    }
  else
    {
      for ( a=0; a<A->len_aln; a++)
      {
        c1=tolower(A->seq_al[0][a]);
        c2=tolower(A->seq_al[1][a]);
        fmat[c1][c2]+=w;
      }
      A->len_aln*=nch;
      A->seq_al[0]-=start;A->seq_al[1]-=start;
      
      return fmat;
    }
}

int pavie_mat2pavie_id_mat ( int **mat,char *in_name, char *alp, char *ignore, char *force,int T, char *out_name)
{
  int n1, n2, n3;
  int s1, s2, S1, S2;
  int a, b;
  int **idmat;
  
  if      (mat==NULL && in_name==NULL) return 0;
  else if (mat==NULL)
    {
      mat=read_matrice (in_name);
    }
  

  idmat=declare_int ( 256, 256);
  n1=strlen (alp);
  n2=strlen (ignore);
  n3=strlen (force);
  
  for (a=0; a< n1; a++)
    for ( b=0; b<n1; b++)
      {
      s1=tolower(alp[a])-'A';S1=toupper(alp[a])-'A';
      s2=tolower(alp[b])-'A';S2=toupper(alp[b])-'A';
      idmat[s1][s2]=idmat[s1][S2]=idmat[S1][S2]=idmat[S1][s2]=(mat[s1][s2]>=T)?PAVIE_MAT_FACTOR:0;
      }
  for (a=0; a<n3; a++)
    for (b=0; b<n1; b++)
      {
      s1=tolower(force[a])-'A';S1=toupper(force[a])-'A';
      s2=tolower(alp[b])-'A';S2=toupper(alp[b])-'A';
      idmat[s1][s2]=idmat[s1][S2]=idmat[S1][S2]=idmat[S1][s2]=PAVIE_MAT_FACTOR;
      }

  for (a=0; a<n2; a++)
    for (b=0; b<n1; b++)
      {
      s1=tolower(ignore[a])-'A';S1=toupper(ignore[a])-'A';
      s2=tolower(alp[b])-'A';S2=toupper(alp[b])-'A';
      idmat[s1][s2]=idmat[s1][S2]=idmat[S1][S2]=idmat[S1][s2]=0;
      }


  

  output_pavie_mat (idmat, out_name, 0, alp);
  free_int (idmat, -1);
  return 1;
}
double paviemat2gep ( int **mat, char *alp)
{
  int a, b, l;
  int n=0;
  double gep=0;
  l=strlen ( alp);
  
  for (a=0; a<l-1; a++)
    for ( b=a+1; b< l; b++)
      {
      gep+=mat[alp[a]-'A'][alp[b]-'A'];
      n++;
      }
  gep/=n;
 
  return gep;
  
}

Alignment *align_pavie_sequences (char *seq0,char *seq1,char **mat,double *gop,double *gep,int nch)
{
  double **F; int **T;
  Alignment *A;
  int XL, YL, len;
  int i, j, a, b, c;
  double match, gap_inX, gap_inY, MXY=1, GX=2, GY=3;
  
  char *ax, *ay;
  char *bufx, *bufy, *buf;
  char *x, *y;
  
  float factor=0.5;

  /*FActor
    terminal gaps are set to gep=gep*factor
  */
  
  x=seq0;
  y=seq1;
  XL=strlen (x)/nch;
  YL=strlen (y)/nch;
  
  
  ax=vcalloc ( (YL+XL)*nch+1, sizeof (char));
  ay=vcalloc ( (YL+XL)*nch+1, sizeof (char));
  bufx=vcalloc ( (YL+XL)*nch+1, sizeof (char));
  bufy=vcalloc ( (YL+XL)*nch+1, sizeof (char));
  
  F=declare_double (XL+2, YL+2);
  T=declare_int (XL+2, YL+2);
  
 
  /*Fill stage*/
  F[0][0] = 0;
  for(i = 1; i <=XL; i++) 
    {


      F[i][0] = F[i-1][0]+pavie_score (x,i-1, NULL,GAP_CODE,mat, gop, gep, nch, factor) /*CL->M[x[i-1]-'A'][gap]*/;
      T[i][0] = GY;
    }
  
  for(j = 1; j <= YL; j++) 
    {

      F[0][j] = F[0][j-1]+pavie_score (NULL,GAP_CODE,y, j-1,mat, gop, gep, nch, factor)/*CL->M[y[j-1]-'A'][gap]*/;
      T[0][j] = GX;
    }

  
  for(i = 1; i <= XL; i++) 
    {
      for(j = 1; j <= YL; j++) 
      {
        
        match  = F[i-1][j-1] + /*CL->M[x[i-1]-'A'][y[j-1]-'A']*/pavie_score (x,i-1,y, j-1,mat, gop, gep, nch, 1);
        gap_inY= F[i-1][j] + /*CL->M[x[i-1]-'A'][gap]*/         pavie_score (x,i-1, NULL,GAP_CODE,mat, gop, gep, nch, (j==YL)?factor:1); 
        gap_inX= F[i][j-1] +  /*+ CL->M[y[j-1]-'A'][gap]*/      pavie_score (NULL,GAP_CODE,y, j-1,mat, gop, gep, nch, (i==XL)?factor:1);
      
        if ( match >= gap_inY && match >=gap_inX){F[i][j]=match; T[i][j]=MXY;}
        else if ( gap_inX>=gap_inY){F[i][j]=gap_inX; T[i][j]=GX;}
        else {F[i][j]=gap_inY; T[i][j]=GY;}
      }
    }
  /*Trace back stage*/
  

  i = XL; 
  j = YL; 
  len=0;
  while(!(i==0 && j==0)) 
    {
      
      if   (T[i][j]==MXY) 
      {
        ax[len]=1;i--;
        ay[len]=1;j--;
      }
      else if ( T[i][j]==GY)
      {
        ax[len]=1;i--;
        ay[len]='-';
      }
      else if ( T[i][j]==GX)
      {
        ax[len]='-';
        ay[len]=1;j--;
      }
      len++;
    }
  
  for (a=0; a<len; a++)
    for (b=1; b<nch; b++)
      {
      ax[a+len*b]=ax[len];
      ay[a+len*b]=ay[len];
      }
  len=len*nch;
  ax[len]='\0';
  ay[len]='\0';

 
  sprintf ( bufx, "%s", ax);
  sprintf ( bufy, "%s", ay);
  
  for (a=1;a<nch; a++)
    {
      strcat (ax, bufx);
      strcat (ay, bufy);
    }
  
  buf=ax;ax=invert_string (ax);vfree(buf);
  buf=ay;ay=invert_string (ay);vfree(buf);
  
  
  A=declare_aln2 (2,strlen(ax));
  A->len_aln=strlen (ax);
  A->nseq=2;
  A->score=A->score_aln=F[XL][YL];
  
  for (a=0, b=0, c=0; a<A->len_aln; a++)
    {
      if (ax[a]==1)ax[a]=seq0[b++];
      if (ay[a]==1)ay[a]=seq1[c++];
    }
  

  
  sprintf ( A->seq_al[0], "%s", ax);
  sprintf ( A->seq_al[1], "%s", ay);
  
  vfree (ax); vfree(ay);vfree (bufx); vfree (bufy);free_double(F, -1); free_int (T, -1);
  return A;
} 


int pavie_score (char *s0,int p0, char *s1,int p1,char **mat_file, double *gop, double *gep, int nch, float factor)
  
  {
    static char *cmat;
    static int  ***mat;
    int l0, l1, c0, c1;
    int a, score=0;
    
    if ( !cmat || !mat_file || !strm (cmat, mat_file[0]))
      {
      if ( !cmat)cmat=vcalloc ( 100, sizeof (char));
      sprintf ( cmat, "%s", (mat_file)?mat_file[0]:"idmat");
      if ( !mat)mat=vcalloc ( nch, sizeof (int**));
      for ( a=0; a< nch; a++)
        {
          if ( mat[a])free_int (mat[a], -1);
          mat[a]=read_matrice ((mat_file)?mat_file[a]:"idmat");
          
        }
      }
    
    l0=(s0)?strlen (s0)/nch:0;
    l1=(s1)?strlen (s1)/nch:0;
    for ( a=0; a< nch; a++)
      {
      c0=(s0)?s0[l0*a+p0]-'A':p0;
      c1=(s1)?s1[l1*a+p1]-'A':p1;
      if ( c0==GAP_CODE)score+=(gep[a]!=0)?gep[a]:mat[a][c1][GAP_CODE];
      else if ( c1==GAP_CODE)score+=(gep[a]!=0)?gep[a]:mat[a][c0][GAP_CODE];
      else score+=mat[a][c0][c1];
      
      }
    score*=factor;
    return score;
  }
Sequence * seq2pavie_seq ( Sequence *S, int nch)
  {
    char *buf;
    int a, b;
    
    S->nseq/=nch;

    for (b=0; b<S->nseq; b++)
      {
      
      buf=vcalloc ((strlen (S->seq[b])*nch)+1, sizeof (char));
      for ( a=0; a< nch; a++)
        {
          strcat (buf, S->seq[b+(S->nseq)*a]);
          vfree ( S->seq[b+(S->nseq)*a]);
        }
      S->seq[b]=buf;
      }
    return S;
  }
char **seq2pavie_alp (Sequence *S, int nch)
  {
    int a, n;
    char **alp;
    
    n=S->nseq/nch;
    alp=vcalloc (nch, sizeof (char*));
    for ( a=0; a< nch; a++)
      {
      alp[a]=array2alphabet (S->seq+n*a, n);
      }
    return alp;
  }
FILE *output_pavie_aln (Alignment *A, int nch, FILE *fp)
{
  int a, b, c,d, l, start, end;
  Alignment *B;

  B=declare_aln2(A->nseq*nch+nch, A->len_aln);
  


  l=A->len_aln/nch;

  for ( a=0; a< nch; a++)
    {
      for (b=0; b< A->nseq; b++, B->nseq++)
      {
        sprintf (B->name[B->nseq], "%s.c%d", A->name[b], a);
        start=l*a;end=start+l;
        for (d=0,c=start; c<end; c++, d++)B->seq_al[B->nseq][d]=A->seq_al[b][c];
        B->seq_al[B->nseq][d]='\0';
      }
      if ( a!=nch-1)
      {
        sprintf (B->name[B->nseq], "");
        for ( b=0; b<l; b++)B->seq_al[B->nseq][b]='^';
        B->nseq++;
      }
    }
  
  B->len_aln=l;
  fp=output_Alignment_without_header (B,fp);
  free_aln (B);
  return fp;
  
}
char **output_pavie_mat_list ( int ***current_mat, double *gep, char **alp, int nch,char *prefix,int cycle, char **mat_name)
{
  int a;
  char mat_list_name[100];
  FILE *fp;
  
  
  sprintf ( mat_list_name, "pavie_matrix%s.cycle_%d.mat_list", prefix, cycle+1);
  fp=vfopen ( mat_list_name, "w");
  fprintf ( stderr, "\n\tOutput Pavie Matrix: %s", mat_list_name);
  for ( a=0; a< nch; a++)
    {
      sprintf ( mat_name[a], "pavie_matrix%s.ch_%d.cy_%d.pavie_mat", prefix,a+1, cycle+1);
      fprintf ( stderr, "\n\t  Channel %d Matrix: %s",a+1, mat_name[a]);
      output_pavie_mat (current_mat[a],mat_name[a],gep[a], alp[a]);
      fprintf ( fp, "%s\n", mat_name[a]);
    }
  vfclose (fp);
  return mat_name;
}


int pavie_pair_wise (Alignment *A,int *ns, int **l_s,Constraint_list *CL )
{
  double **F; int **T;
  char *x,*y;
  char *ax, *ay;
  int XL, YL, len;
  int i, j;
  double match, gap_inX, gap_inY, MXY=1, GX=2, GY=3;
  int gap=GAP_CODE;
  char *ix, *iy;
  float factor=0.5;


  /*factor:
    decreases terminal gap penalties with a factor X
    factor=1: terminal gap penalties <=> internal gap penalties
  */

  x=A->seq_al[l_s[0][0]];
  y=A->seq_al[l_s[1][0]];
  XL=strlen (x);
  YL=strlen (y);
  
  ax=vcalloc ( YL+XL+1, sizeof (char));
  ay=vcalloc ( YL+XL+1, sizeof (char));
  
  
  F=declare_double (XL+2, YL+2);
  T=declare_int (XL+2, YL+2);
  
 
  /*Fill stage*/
  F[0][0] = 0;
  for(i = 1; i <=XL; i++) 
    {

      F[i][0] = F[i-1][0]+(CL->M[x[i-1]-'A'][gap]*factor);
      T[i][0] = GY;
    }
  
  for(j = 1; j <= YL; j++) 
    {
      F[0][j] = F[0][j-1]+CL->M[y[j-1]-'A'][gap]*factor;
      T[0][j] = GX;
    }

  
  for(i = 1; i <= XL; i++) 
    {
      for(j = 1; j <= YL; j++) 
      {

        match  = F[i-1][j-1] + CL->M[x[i-1]-'A'][y[j-1]-'A'];
        gap_inY= F[i-1][j]   + (CL->M[x[i-1]-'A'][gap]*(j==YL)?factor:1);
        gap_inX= F[i][j-1]   + (CL->M[y[j-1]-'A'][gap]*(i==XL)?factor:1);
      
        if ( match >= gap_inY && match >=gap_inX){F[i][j]=match; T[i][j]=MXY;}
        else if ( gap_inX>=gap_inY){F[i][j]=gap_inX; T[i][j]=GX;}
        else {F[i][j]=gap_inY; T[i][j]=GY;}
      }
    }
  /*Trace back stage*/
  A->score=A->score_aln=F[XL][YL];
  
  i = XL; 
  j = YL; 
  len=0;
  while(!(i==0 && j==0)) 
    {
      
      if   (T[i][j]==MXY) 
      {
        ax[len]=x[--i];
        ay[len]=y[--j];
      }
      else if ( T[i][j]==GY)
      {
        ax[len]=x[--i];
        ay[len]='-';
      }
      else if ( T[i][j]==GX)
      {
        ax[len]='-';
        ay[len]=y[--j];
      }
      len++;
    }
  ax[len]='\0';
  ay[len]='\0';
  
  ix=invert_string (ax); iy=invert_string(ay);
  A=realloc_aln (A,len+1);
  
  sprintf ( A->seq_al[l_s[0][0]], "%s", ix);
  sprintf ( A->seq_al[l_s[1][0]], "%s", iy);
  A->nseq=2;
  A->len_aln=len;
  
  vfree (ax); vfree(ay);vfree(ix); vfree(iy); free_double(F, -1); free_int (T, -1);
  return A->score;
} 

float pavie_aln2id ( Alignment *A, int mode)
{
  int a, b, id=0, match=0, l1=0, l2=0, r1, r2, is_res1, is_res2;
  


  for (a=0; a<A->len_aln; a++)
    {
      r1=A->seq_al[0][a];
      r2=A->seq_al[1][a];
      
      
      is_res1=(!is_gap(r1) && r1!='x')?1:0;
      is_res2=(!is_gap(r2) && r2!='x')?1:0;

      l1+=is_res1;
      l2+=is_res2;
      
      
      if ( is_res1 && is_res2 )
      {
        match++;
        id+=(r1==r2)?1:0;
      }
    }


  if ( mode==1)return (match==0)?0:((id*100)/match);
  else if (mode ==2) return (A->len_aln==0)?0:((id*100)/A->len_aln);
  else if (mode ==3) return (MIN(l1,l2)==0)?0:((id*100)/(MIN(l1,l2)));
  else if (mode ==4) return (MAX(l1,l2)==0)?0:((id*100)/(MIN(l1,l2)));
  else if (mode ==5)return (A->score_aln * -1)/PAVIE_MAT_FACTOR;
  else
    {
      fprintf ( stderr, "\nUnknown Mode [pavie_aln2id:FATAL:%s]", PROGRAM);
      exit (EXIT_FAILURE);
      return EXIT_FAILURE;
    }

}
               
char * vstrstr ( char *in, char *token)
{
  if (!in || !token) return NULL;
  else return strstr (in, token);
}


int check_pavie_cl ( char *string)
{
  if ( !string || string[0]=='\0' ) return 1;
  else if (( string[0]!='_') ||string [strlen (string)-1]!='_')
    {
      fprintf ( stderr, "ERROR: parameters must satrt and finish with an underscore: _parameters_ [FATAL:%s]\n", PROGRAM);
      exit (EXIT_FAILURE);
    }
  return 1;
}
/*********************************COPYRIGHT NOTICE**********************************/
/* Centre National de la Recherche Scientifique (CNRS) */
/*and */
/*Cedric Notredame */
/*Tue May 10 12:08:44     2005. */
/*All rights reserved.*/
/*This file is part of T-COFFEE.*/
/**/
/*    T-COFFEE is free software; you can redistribute it and/or modify*/
/*    it under the terms of the GNU General Public License as published by*/
/*    the Free Software Foundation; either version 2 of the License, or*/
/*    (at your option) any later version.*/
/**/
/*    T-COFFEE is distributed in the hope that it will be useful,*/
/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
/*    GNU General Public License for more details.*/
/**/
/*    You should have received a copy of the GNU General Public License*/
/*    along with Foobar; if not, write to the Free Software*/
/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
/*...............................................                                                                                      |*/
/*  If you need some more information*/
/*  cedric.notredame@europe.com*/
/*...............................................                                                                                                                                     |*/
/**/
/**/
/*    */
/*********************************COPYRIGHT NOTICE**********************************/

Generated by  Doxygen 1.6.0   Back to index