/**
 * Copyright (c) 2011, Leena Salmela <leena.salmela@cs.helsinki.fi>
 * All rights reserved.
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <limits.h>
#include <math.h>

#include "align.h"

#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))

#define MAX_ALIGN_LEN 10000

align_t *init_align() {
  return new align_t;
}

void reverse_base(char *orig, char *reversed) {
    int i;
    int m;

    m = strlen(orig);
    for(i = 0; i < (signed int)strlen(orig); i++) {
        switch(orig[i]) {
        case 'A':
            reversed[m-i-1] = 'T';
            break;
        case 'C':
            reversed[m-i-1] = 'G';
            break;
        case 'G':
            reversed[m-i-1] = 'C';
            break;
        case 'T':
            reversed[m-i-1] = 'A';
            break;
        case 'a':
            reversed[m-i-1] = 't';
            break;
        case 'c':
            reversed[m-i-1] = 'g';
            break;
        case 'g':
            reversed[m-i-1] = 'c';
            break;
        case 't':
            reversed[m-i-1] = 'a';
            break;
        default:
            reversed[m-i-1] = 'N';
            break;
        }
    }
    reversed[m] = '\0';
}

// Is contig1 contained in contig2? The alignment should start in
// interval [start, end]
int is_contained(char *contig1, int forward1, int len1,
                 char *contig2, int forward2, int len2,
                 int start, int end, align_t *align, double max_err) {
  int i,j;
  int m1, m2;

  if (max_err < 0.0 || max_err >= 1.0)
    max_err = MAX_ERR;

  if (!forward1) {
    reverse_base(contig1, align->buf1);
  } else {
    strcpy(align->buf1, contig1);
  }

  if (!forward2) {
    reverse_base(contig2, align->buf2);
  } else {
    strcpy(align->buf2, contig2);
  }

  if (start < 0)
    start = 0;
  if (end > len2)
    end = len2;

  // initialize dynamic programming array
  m1 = len1;
  m2 = end-start+(1.0 + max_err)*len1;
  if (start + m2 > len2)
    m2 = len2-start;

  for(j = 0; j <= end; j++) {
    align->dp[0][j].score = 0;
    align->dp[0][j].len1 = 0;
    align->dp[0][j].len2 = 0;
  }

  int min;

  for(i = 0; i < m1; i++) {
    align->dp[1][i].score = INT_MAX-1;
    align->dp[1][i].len1 = 0;
    align->dp[1][i].len2 = 0;
    min = INT_MAX;
    for(j = i; j < MIN(m2, end-start+i); j++) {
      if (toupper(align->buf1[i]) == 'N' || toupper(align->buf2[start+j]) == 'N' ||
          toupper(align->buf1[i]) == toupper(align->buf2[start+j])) {
        if (align->dp[0][j].score < align->dp[0][j+1].score+1 && align->dp[0][j].score < align->dp[1][j].score+1) {
          align->dp[1][j+1].score = align->dp[0][j].score;
          if (toupper(align->buf1[i]) == 'N' || toupper(align->buf2[start+j]) == 'N') {
            align->dp[1][j+1].len1 = align->dp[0][j].len1;
            align->dp[1][j+1].len2 = align->dp[0][j].len2;
          } else {
            align->dp[1][j+1].len1 = align->dp[0][j].len1+1;
            align->dp[1][j+1].len2 = align->dp[0][j].len2+1;
          }
        } else if (align->dp[0][j+1].score+1 < align->dp[1][j].score+1) {
          align->dp[1][j+1].score = align->dp[0][j+1].score + 1;
          align->dp[1][j+1].len1 = align->dp[0][j+1].len1+1;
          align->dp[1][j+1].len2 = align->dp[0][j+1].len2;
        } else {
          align->dp[1][j+1].score = align->dp[1][j].score + 1;
          align->dp[1][j+1].len1 = align->dp[1][j].len1;
          align->dp[1][j+1].len2 = align->dp[1][j].len2+1;
        }
      } else {
        if (align->dp[0][j].score + 1 < align->dp[0][j+1].score+1 && align->dp[0][j].score < align->dp[1][j].score+1) {
          align->dp[1][j+1].score = align->dp[0][j].score + 1;
          align->dp[1][j+1].len1 = align->dp[0][j].len1+1;
          align->dp[1][j+1].len2 = align->dp[0][j].len2+1;
        } else if (align->dp[0][j+1].score+1 < align->dp[1][j].score+1) {
          align->dp[1][j+1].score = align->dp[0][j+1].score + 1;
          align->dp[1][j+1].len1 = align->dp[0][j+1].len1+1;
          align->dp[1][j+1].len2 = align->dp[0][j+1].len2;
        } else {
          align->dp[1][j+1].score = align->dp[1][j].score + 1;
          align->dp[1][j+1].len1 = align->dp[1][j].len1;
          align->dp[1][j+1].len2 = align->dp[1][j].len2+1;
        }
      }
      if (align->dp[1][j+1].score < min)
        min = align->dp[1][j+1].score;
    }
    for(j = i; j <= MIN(m2, end-start+i); j++) {
      align->dp[0][j] = align->dp[1][j];
    }
    if (end-start+i < m2) {
      align->dp[0][end-start+i+1].score = INT_MAX-1;
      align->dp[0][end-start+i+1].len1 = 0;
      align->dp[0][end-start+i+1].len2 = 0;
    }

    if (min > max_err*m1)
      return 0;
  }

  for(j = m1; j <= MIN(m2, end-start+m1); j++) {
    if (align->dp[0][j].score <= max_err*MIN(align->dp[0][j].len1, align->dp[0][j].len2)) {
      return 1;
    }
  }

  return 0;
}

#define MIN_OVERLAP_LEN 20

// Does the end of contig1 overlap with the beginning of contig2?
int is_overlap(char *contig1, int forward1, int len1,
               char *contig2, int forward2, int len2,
               int start, int end, align_t *align, double max_err) {
  int i,j;
  int m1, m2;

  if (max_err < 0.0 || max_err >= 1.0)
    max_err = MAX_ERR;

  if (!forward1) {
    reverse_base(contig1, align->buf1);
  } else {
    strcpy(align->buf1, contig1);
  }

  if (!forward2) {
    reverse_base(contig2, align->buf2);
  } else {
    strcpy(align->buf2, contig2);
  }

  if (start < 0)
    start = 0;
  if (end > len1)
    end = len1;

  // initialize dynamic programming array
  m1 = len1-start;
  m2 = (1.0 + max_err)*m1;
  if (m2 > len2)
    m2 = len2;
  if (m1 > len1)
    m1 = len1;

  for(i = 0; i <= end-start; i++) {
    align->dp[0][i].score = 0;
    align->dp[0][i].len1 = 0;
    align->dp[0][i].len2 = 0;
    // printf("Init %d\n", i);
  }

  // printf("---\n");

  double min = 1.0;
  int min_j = -1;
  int min_value;

  for(j = 0; j < m2; j++) {
    align->dp[1][j].score = INT_MAX-1;
    align->dp[1][j].len1 = m1;
    align->dp[1][j].len2 = m2;
    // printf("Init %d\n", j);
    min_value = INT_MAX;
    for(i = j; i < MIN(m1, (end-start)+j); i++) {
      // printf("Set %d\n", i+1);
      if (toupper(align->buf1[start+i]) == 'N' || toupper(align->buf2[j]) == 'N' ||
          toupper(align->buf1[start+i]) == toupper(align->buf2[j])) {
        if (align->dp[0][i].score < align->dp[0][i+1].score+1 && align->dp[0][i].score < align->dp[1][i].score+1) {
          align->dp[1][i+1].score = align->dp[0][i].score;
          if (toupper(align->buf1[start+i]) == 'N' || toupper(align->buf2[j]) == 'N') {
            align->dp[1][i+1].len1 = align->dp[0][i].len1;
            align->dp[1][i+1].len2 = align->dp[0][i].len2;
          } else {
            align->dp[1][i+1].len1 = align->dp[0][i].len1+1;
            align->dp[1][i+1].len2 = align->dp[0][i].len2+1;
          }
        } else if (align->dp[0][i+1].score+1 <  align->dp[1][i].score+1) {
          align->dp[1][i+1].score = align->dp[0][i+1].score + 1;
          align->dp[1][i+1].len1 = align->dp[0][i+1].len1+1;
          align->dp[1][i+1].len2 = align->dp[0][i+1].len2;
        } else {
          align->dp[1][i+1].score = align->dp[1][i].score + 1;
          align->dp[1][i+1].len1 = align->dp[1][i].len1;
          align->dp[1][i+1].len2 = align->dp[1][i].len2+1;
        }
      } else {
        if (align->dp[0][i].score + 1 < align->dp[0][i+1].score+1 && align->dp[0][i].score < align->dp[1][i].score+1) {
          align->dp[1][i+1].score = align->dp[0][i].score + 1;
          align->dp[1][i+1].len1 = align->dp[0][i].len1+1;
          align->dp[1][i+1].len2 = align->dp[0][i].len2+1;
        } else if (align->dp[0][i+1].score+1 <  align->dp[1][i].score+1) {
          align->dp[1][i+1].score = align->dp[0][i+1].score + 1;
          align->dp[1][i+1].len1 = align->dp[0][i+1].len1+1;
          align->dp[1][i+1].len2 = align->dp[0][i+1].len2;
        } else {
          align->dp[1][i+1].score = align->dp[1][i].score + 1;
          align->dp[1][i+1].len1 = align->dp[1][i].len1;
          align->dp[1][i+1].len2 = align->dp[1][i].len2+1;
        }
      }
      if (align->dp[1][i+1].score < min_value)
        min_value = align->dp[1][i+1].score;
    }
    for(i = j; i <= MIN(m1, end-start+j); i++) {
      align->dp[0][i] = align->dp[1][i];
      // printf("Copy %d\n", i);
    }

    if (end-start+j <= m1) {
      align->dp[0][end-start+j+1].score = INT_MAX-1;
      align->dp[0][end-start+j+1].len1 = m1;
      align->dp[0][end-start+j+1].len2 = m2;
      // printf("Init %d\n", end-start+j+1);
    }

    if (j+1 >= MIN_OVERLAP_LEN && end-start+j >= m1 && 
        j+1 <= m1 &&
        align->dp[0][m1].score <= max_err*MIN(align->dp[0][m1].len1, align->dp[0][m1].len2)) {
      if ((double)(align->dp[0][m1].score+1)/(double)(MIN(align->dp[0][m1].len1, align->dp[0][m1].len2)+1) < min) {
        min = (double)(align->dp[0][m1].score+1)/(double)(MIN(align->dp[0][m1].len1, align->dp[0][m1].len2)+1);
        min_j = j+1;
      }
    }

    if ((double)min_value / (double)(MIN(m1,m2)) > max_err)
      break;
  }

  if (min_j > 0) {

    return min_j;
  }

  return 0;
}

#ifdef TEST

int main() {
  char str1[40960];
  char str2[40960];
  align_sw_t *align;

  strcpy(str2, "atgatagatagatcgtgatcgatggtagatcgactgattgtgtgtttcgtcgat");
  strcpy(str1,          "agatcgtgatcgatggtagatcgactgattgtgtgttt");

  strcpy(str2,                "atgatagatagatcgtgatcgatggtagatcgactgattgtgtgtttcgtcgat");
  strcpy(str1, "aatgtagatgatatgatgatagatagatcgtgatcgatggtagatcgactgat");

  align = init_align();

  printf("%s in %s: %d\n", str1, str2,
         is_overlap(str1, 1, strlen(str1),
                    str2, 1, strlen(str2), 0, 1000, align, 3.0));
  return 0;
}

#endif
