Logo Search packages:      
Sourcecode: t-coffee version File versions  Download package

util_dp_gotoh_sw.c

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stdarg.h>


#include "io_lib_header.h"
#include "util_lib_header.h"
#include "define_header.h"

#include "dp_lib_header.h"

int gotoh_pair_wise_lalign ( Alignment *A, int*ns, int **l_s,Constraint_list *CL)
    {
    Alignment *BUF=NULL;
    Alignment *EA=NULL;
    
    int a;
    BUF=copy_aln (A, BUF);
    
    
    for ( a=0; a<CL->lalign_n_top; a++)
      {
      free_aln (A);

      A=copy_aln (BUF, A);
      
      A->score_aln=gotoh_pair_wise_sw (A, ns, l_s, CL);
      EA=fast_coffee_evaluate_output (A, CL);

      output_format_aln (CL->out_aln_format[0],A,EA,"stdout");
      CL=undefine_sw_aln ( A, CL);
      }
    exit (1);
    return 0;
    }
Constraint_list * undefine_sw_aln ( Alignment *A, Constraint_list *CL)
  {
    int a, b, l;
    int **pos;
    int  r1, rs1;
    int  r2, rs2;
    


    pos=aln2pos_simple ( A,A->nseq);
    
    for ( l=0; l< A->len_aln; l++)
      for ( a=0; a< A->nseq-1; a++)
      {
        rs1=A->order[a][0];
        r1 =pos[a][l];
            
        if ( r1<=0)continue;
        for ( b=a+1; b< A->nseq;b++)
              {
                rs2=A->order[b][0];
                r2 =pos[b][l];
                if ( r2<=0)continue;
                
                CL=undefine_sw_pair ( CL, rs1, r1, rs2, r2);
              }
      }
    free_int (pos, -1);
    return CL;
  }
Constraint_list * undefine_sw_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2)
  {
    int a, b;
    
    if ( !CL->forbiden_pair_list)
      {
      
      CL->forbiden_pair_list=vcalloc ( (CL->S)->nseq, sizeof (int ***));
      for ( a=0; a< ((CL->S)->nseq); a++)
        {
          CL->forbiden_pair_list[a]=vcalloc ( (CL->S)->nseq, sizeof (int **));
          for ( b=0; b< ((CL->S)->nseq); b++)
            CL->forbiden_pair_list[a][b]=vcalloc ( (CL->S)->len[a]+1, sizeof (int *));
          
        }
      }
    if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)CL->forbiden_pair_list[s1][s2][r1]=vcalloc ( (CL->S)->len[s2]+1, sizeof (int));
    CL->forbiden_pair_list[s1][s2][r1][r2]=1;
    
    if ( CL->forbiden_pair_list[s2][s1][r2]==NULL)CL->forbiden_pair_list[s2][s1][r2]=vcalloc ( (CL->S)->len[s1]+1, sizeof (int));
    CL->forbiden_pair_list[s2][s1][r2][r1]=1;
    
    return CL;
  }
       
int sw_pair_is_defined ( Constraint_list *CL, int s1, int r1, int s2, int r2)
        {
        int d;
        
        d=(r1-r2);
        d=(d<0)?-d:d;
        

        if ( s1==s2 && d<(CL->sw_min_dist)) return UNDEFINED;
        else if ( ! CL->forbiden_pair_list) return 1;
        else if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)return 1;
        else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==1)return UNDEFINED;
        else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==0)return 1;
        
        else 
          {
            crash ("ERROR in function: sw_pair_is_defined\n");
            return UNDEFINED;
          }
          
      }     

  
int gotoh_pair_wise_sw (Alignment *A, int*ns, int **l_s,Constraint_list *CL)
      {
/*******************************************************************************/
/*                SMITH AND WATERMAN                                           */
/*                                                                             */
/*    makes DP between the the ns[0] sequences and the ns[1]                 */
/*                                                                             */
/*    for MODE, see the function  get_dp_cost                                */
/*******************************************************************************/
      int a, b, d, i, j;
      int last_i, last_j;
      int t;
      int *cc;
      int *dd, *ddg;
      int lenal[2], len;
      
      int c,s, e,eg, ch,g,h, maximise;
      int sub;
      
      int fop;
      static int **pos0;
      
      char **al=NULL;
      char **aln=NULL;

      
      int ala, alb,LEN;
      char *buffer;
      char *char_buf;
      
/*trace back variables       */
      int best_i;
      int best_j;
      int best_score;
      
      
      FILE       *long_trace=NULL;
      TRACE_TYPE *buf_trace=NULL;
      static TRACE_TYPE **trace;
      TRACE_TYPE k;
      TRACE_TYPE *tr;
      int long_trace_flag=0;
      int dim;
/********Prepare penalties*******/
      if (CL->moca)
        {
          g=(CL->gop+(CL->moca)->moca_scale)*SCORE_K;
          h=(CL->gep+(CL->moca)->moca_scale)*SCORE_K;
        }
      else
        {
          g=(CL->gop)*SCORE_K;
          h=(CL->gep)*SCORE_K;
        }
      maximise=CL->maximise;
/********************************/  
/*CLEAN UP AFTER USE*/
      if ( A==NULL)
         {
         free_int (trace,-1);
         trace=NULL;
         if ( al)free_char (al,-1);
         al=NULL;
         return 0;
         }     
/*DO MEMORY ALLOCATION FOR SW DP*/
      
      lenal[0]=strlen (A->seq_al[l_s[0][0]]);
      lenal[1]=strlen (A->seq_al[l_s[1][0]]);
      len= (( lenal[0]>lenal[1])?lenal[0]:lenal[1])+1;
      buf_trace=vcalloc ( len, sizeof (TRACE_TYPE));  
      buffer=vcalloc ( 2*len, sizeof (char));   
      al=declare_char (2, 2*len);  
      
      char_buf= vcalloc (2*len, sizeof (char)); 
      dd= vcalloc (len, sizeof (int));
      cc= vcalloc (len, sizeof (int));
      ddg=vcalloc (len, sizeof (int));
      
      
      if ( len>=MAX_LEN_FOR_DP)
          {     
          long_trace_flag=1;
          long_trace=vtmpfile();       
          }
      else
          {
          dim=(trace==NULL)?0:read_size_int ( trace,sizeof (int*));
          trace    =realloc_int ( trace,dim,dim,len-dim, len-dim);
          }
      
/*END OF MEMORY ALLOCATION*/
      
      
            /*
            0(s)   +(dd)
              \      |
               \     |
                \    |
                 \   |
                  \  |
                   \ |
                    \|
            -(e)----O
            */ 
                   
      
      pos0=aln2pos_simple ( A,-1, ns, l_s);     

      

      cc[0]=0;

      best_score=0;
      best_i=0;
      best_j=0;
      
      tr=(long_trace_flag)?buf_trace:trace[0];
      tr[0]=(TRACE_TYPE)UNDEFINED;

      t=g;
      for ( j=1; j<=lenal[1]; j++)
            {
            cc[j]=t=t+h;
            dd[j]=t+g;
            tr[j]=(TRACE_TYPE)UNDEFINED;
            }
      if (long_trace_flag)fwrite (buf_trace, sizeof ( TRACE_TYPE),lenal[1]+1, long_trace);
      

      t=g;
      for (i=1; i<=lenal[0];i++)
                  {                  
                  tr=(long_trace_flag)?buf_trace:trace[i];
                  s=cc[0];
                  cc[0]=c=t=t+h;
                  e=t+g;
                  tr[0]=(TRACE_TYPE)UNDEFINED;

                  for (eg=0,j=1; j<=lenal[1];j++)
                        {
                        
                        sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
                        
                        /*get the best Insertion*/
                        if ( a_better_than_b ( e, c+g, maximise))
                              eg++;
                        else 
                              eg=1; 
                        e=best_of_a_b (e, c+g, maximise)+h;
                        
                        /*Get the best deletion*/
                        if ( a_better_than_b ( dd[j], cc[j]+g, maximise))
                              ddg[j]++;
                        else 
                              ddg[j]=1;
                        dd[j]=best_of_a_b( dd[j], cc[j]+g, maximise)+h;
                        
                        /*Chose Substitution for tie breaking*/
                        if ( sub!=UNDEFINED)c=best_int(4,maximise,&fop, e, (s+sub), dd[j],0);
                        else 
                           {
                           c=0;
                           fop=3;
                           dd[j]=e=0;
                           eg=ddg[j]=0;
                           }

                        if ( c>best_score)
                           {
                           best_i=i;
                           best_j=j;
                           best_score=c;
                           }
                        fop-=1;
                        s=cc[j];
                        cc[j]=c;                  
                        
                  
                        if ( fop==-1)
                              {tr[j]=(TRACE_TYPE)fop*eg;
                              }
                        else if ( fop==1)
                                {tr[j]=(TRACE_TYPE)fop*ddg[j];
                              }
                        else if (fop==0)
                              {tr[j]=(TRACE_TYPE)0;   
                              }     
                        else if ( fop==2)
                                {
                              tr[j]=(TRACE_TYPE)UNDEFINED;
                              }
                        
                        fop= -2;
                        }
                  if (long_trace_flag)
                      {
                      fwrite ( buf_trace, sizeof (TRACE_TYPE), lenal[1]+1, long_trace);
                      }
                  }
      
      


      if (best_i==0 ||best_j==0 )
          {
          vfree (buf_trace);
          vfree (buffer);
          free_char ( al,-1);
          vfree ( char_buf);
          vfree ( dd);
          vfree ( cc);        
          vfree ( ddg);
          free_int (pos0, -1);
          A->len_aln=0;
          aln=A->seq_al;
          
          for ( c=0; c< 2; c++)
              {
            for ( a=0; a< ns[c]; a++) 
                {
                aln[l_s[c][a]][0]='\0';
                }
            }
          if ( long_trace_flag)fclose ( long_trace);
          
          return UNDEFINED;
          }
      else
          {
          i=last_i=best_i;
          j=last_j=best_j;
          }
      ala=alb=0;
      
      
      while (i>0 && j>0)
                  {
                  if ( i==0 || j==0)k=UNDEFINED;
                    /*                    k=-1;
                  else if ( j==0)
                        k=1;
                  else if ( j==0 && i==0)
                  k=1;*/      
                  else
                          {
                        if (long_trace_flag)
                           {
                           fseek ( long_trace, sizeof (TRACE_TYPE)*((lenal[1]+1)*(i)+j),SEEK_SET);
                           fread ( &k, sizeof (TRACE_TYPE), 1, long_trace);
                           }
                        else
                           {
                           
                           k=trace[i][j];
                           }
                        }
                        
                  if ( k==UNDEFINED){i=j=0;}    
                  if (k==0)
                        {
                        
                        al[0][ala++]=1;
                        al[1][alb++]=1;
                           
                  
 
                        i--;
                        j--;
                        last_i=i;
                        last_j=j;
                        
                        }
                  else if (k==(TRACE_TYPE)UNDEFINED)
                          {
                        i=0;
                        j=0;
                        
                        }
                  else if (k>0)
                        {
                        
                        for ( a=0; a< k; a++)
                              {
                              al[0][ala++]=1;
                              al[1][alb++]=0;
                              i--;
                              }
                        last_i=i;
                        last_j=j;
                        }
                  else if (k<0)
                        {
                        
                        for ( a=0; a>k; a--)
                              {
                              al[0][ala++]=0;
                              al[1][alb++]=1;
                              j--;
                              }
                        last_i=i;
                        last_j=j;
                        }
                  }
      
      LEN=ala;    
      c=LEN-1;  
      
      

      invert_list_char ( al[0], LEN);
      invert_list_char ( al[1], LEN);
      
      if ( A->declared_len<=LEN)realloc_alignment  ( A, 2*LEN);   

      
      aln=A->seq_al;
      
      
      for ( c=0; c<2; c++)
          for ( a=0; a<ns[c]; a++)
              {
            e=(c==0)?last_i:last_j;
            for ( d=0; d<e; d++)
                {
                A->order[l_s[c][a]][1]+=1-is_gap(aln[l_s[c][a]][d]);
                }
            }
      
                
      for ( c=0; c< 2; c++)
          {
          for ( a=0; a< ns[c]; a++) 
            {
            aln[l_s[c][a]]+=(c==0)?last_i:last_j;
            ch=0;
            for ( b=0; b< LEN; b++)
                {
               
                if (al[c][b]==1)
                  char_buf[b]=aln[l_s[c][a]][ch++];
                else
                  char_buf[b]='-';
               }
            char_buf[b]='\0';
            aln[l_s[c][a]]-=(c==0)?last_i:last_j;
            sprintf (aln[l_s[c][a]],"%s", char_buf);
              }
           }
      
      
      A->len_aln=LEN;
      
      free_int (pos0, -1);
      vfree ( cc);
      vfree (dd);       
      vfree (ddg);
      vfree (buffer);
      vfree (char_buf); 
      vfree (buf_trace);
      if ( long_trace_flag)fclose (long_trace); 
      
            
      return best_score;
      }


/*******************************************************************************/
/*                AUTOMATIC GEP+SCALE PENALTY FOR SW                           */
/*                                                                             */
/*                                                                           */
/*                                                                             */
/*                                                                           */
/*******************************************************************************/

double compute_penalty   (Constraint_list *CL, char *mode, int len)
     {


     double p;
     int **bin;
     int coor;
     int n;

     bin=bin_list (CL, WE,0);
     n=read_size_int ( bin,sizeof (int*));
     p=return_max_coor_int(bin, n, 3, &coor);     
     
     
     p=bin[coor][3];
     free_int (bin, -1);
     
     return p;
     }



double compute_scale ( Constraint_list *CL,char *mode, int len)
     {
     double p;
     int n;
     int **bin;
     int coor;

    
     bin=bin_list ( CL,WE,0);
     n=read_size_int ( bin,sizeof (int*)); 
     p=return_max_coor_int(bin, n, 3, &coor);     
     p=bin[1][3];
  
     free_int (bin, -1);
     return p;
          
     }
       




int evaluate_penalty (Alignment *A, Constraint_list *CL, int *scale,char *scale_mode, int *penalty, char *penalty_mode, int len_seq)
    {
    Constraint_list *NCL;

    if ( scale[0]!=0 && penalty[0]!=0)return 1;
    else
        {
      if (CL->M)
          {
          scale[0]=0;
          penalty[0]=-0.5*SCORE_K;
          }
      else
          {
          
          NCL=duplicate_constraint_list ( CL);
          NCL=mask_list_with_aln_pair ( A, 0, (A==NULL)?0:A->len_aln,NCL, UNDEFINED);
          scale[0]=compute_scale ( NCL, scale_mode, len_seq)*-SCORE_K;      
          penalty[0]=compute_penalty ( NCL, penalty_mode, len_seq)*-SCORE_K;  
          free_constraint_list (NCL);   
          }
      return 1;
      }
    }



Alignment * add_seq2aln   (Constraint_list *CL, Alignment *IN,Sequence  *S)
           {      
         int *n_groups;
         int **group_list;
         int a;
         static int series=0;
        
        
        
        
         int ste; /*sequence to extract, last one if they are packed*/





         if (CL->packed_seq_lu){ste=S->nseq-1;}
         else{ste=0;}

         if ( IN==NULL)
            {
            IN=realloc_aln2(IN, 1, strlen (S->seq[ste])+1);
            IN->S=S;
            IN->nseq=1;
            
            
            
            sprintf ( IN->seq_al[0], "%s", S->seq[ste]);
            sprintf (IN->name[0], "%s_%d_1", S->name[ste],series);
            IN->order[0][0]=ste;
            IN->order[0][1]=0;
            
            IN->len_aln=strlen ( IN->seq_al[0]);
            series++;
          
            }        
         else
            {
            
            IN=realloc_aln2 ( IN, IN->nseq+1,MAX(strlen ( S->seq[ste])+1, IN->len_aln+1));
            n_groups=vcalloc ( 2, sizeof (int));
            group_list=declare_int (2,IN->nseq+1);
            
            n_groups[0]=IN->nseq;
            for ( a=0; a<IN->nseq; a++)group_list[0][a]=a;
            
            n_groups[1]=1;
            group_list[1][0]=IN->nseq;
            sprintf (IN->name[IN->nseq], "%s_%d_%d",S->name[ste],series,IN->nseq+1);
            sprintf (IN->seq_al[IN->nseq], "%s",S->seq[ste]);
            IN->order[IN->nseq][0]=ste;
            IN->order[IN->nseq][1]=0;
            IN->nseq++;
            
           
            pair_wise ( IN, n_groups, group_list,CL); 
            
            }
        
         return IN;
            
         }
         

/*********************************COPYRIGHT NOTICE**********************************/
/* Centre National de la Recherche Scientifique (CNRS) */
/*and */
/*Cedric Notredame */
/*Tue May 10 12:08:44     2005. */
/*All rights reserved.*/
/*This file is part of T-COFFEE.*/
/**/
/*    T-COFFEE is free software; you can redistribute it and/or modify*/
/*    it under the terms of the GNU General Public License as published by*/
/*    the Free Software Foundation; either version 2 of the License, or*/
/*    (at your option) any later version.*/
/**/
/*    T-COFFEE is distributed in the hope that it will be useful,*/
/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
/*    GNU General Public License for more details.*/
/**/
/*    You should have received a copy of the GNU General Public License*/
/*    along with Foobar; if not, write to the Free Software*/
/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
/*...............................................                                                                                      |*/
/*  If you need some more information*/
/*  cedric.notredame@europe.com*/
/*...............................................                                                                                                                                     |*/
/**/
/**/
/*    */
/*********************************COPYRIGHT NOTICE**********************************/

Generated by  Doxygen 1.6.0   Back to index