/**
 * Copyright (c) 2011, Leena Salmela <leena.salmela@cs.helsinki.fi>
 * All rights reserved.
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <set>
#include <map>
#include <iostream>
#include <fstream>

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <math.h>

using namespace std;

#include "io.h"
#include "align.h"

typedef struct {
  int contig;
  long start;
  long end;
} contig_t;

int compare_contig(const void *p1, const void *p2) {
  contig_t *c1 = (contig_t *) p1;
  contig_t *c2 = (contig_t *) p2;

  long pos1, pos2;

  pos1 = (c1->start < c1->end) ? c1->start : c1->end;
  pos2 = (c2->start < c2->end) ? c2->start : c2->end;

  if (pos1 - pos2 < 0)
    return -1;
  else if (pos1 - pos2 > 0)
    return 1;
  else
    return 0;
}

int compare_long(const void *p1, const void *p2) {
  long i1 = *((long *)p1);
  long i2 = *((long *)p2);

  if (i1 - i2 < 0)
    return -1;
  else if (i1 - i2 > 0)
    return 1;
  else
    return 0;
}

void usage(char *prog) {
  printf("Usage: %s [options] <config> <lp solutions> <contigs>\n\n", prog);

  printf("Scaffold specifications are outputted to stdout.\n\n");

  printf("Options:\n");
  printf("-n <int>    Upper bound for genome length\n");
  printf("-o <int>    Maximum length of overlap to accept without checking\n");
  printf("            for sequence similarity.\n");
  printf("-e <float>  Maximum error allowed when aligning contigs\n");
}

#define INVALID -1

int main(int argc, char *argv[]) {
  int num_scaffolds;
  set<int> *temp_scaffolds;
  map<int,long> *contig2temppos;

  map<int,int> contig2scaffold;
  map<int,long> contig2position;
  map<int,set<int> *> scaffolds;

  int *contig_length;
  char **contig_seq;
  char **contig_names;
  map<string,int> contig2id;
  int num_contigs;

  int max_overlap = INVALID;
  long genome_length = 100000000;

  char *prog = argv[0];
  char *contig_file;
  char *solution_file;
  char *config;

  align_t *align = NULL;

  double max_align_err = -1.0;

  while(argc > 3) {
    if (!strcmp(argv[0], "-o")) {
      max_overlap = atoi(argv[1]);
      argv++;
      argc--;
    } else if (!strcmp(argv[0], "-n")) {
      genome_length = atoi(argv[1]);
      argv++;
      argc--;
    } else if (!strcmp(argv[0], "-e")) {
      max_align_err = atof(argv[1]);
      argv++;
      argc--;
    }

    argv++;
    argc--;
  }

  if (argc != 3) {
    usage(prog);
    abort();
  }

  contig_file = argv[2];
  solution_file = argv[1];
  config = argv[0];

  if (max_overlap == INVALID) {
    int insert[MAX_LIBRARIES];
    int min[MAX_LIBRARIES];
    int max[MAX_LIBRARIES];
    char *files[MAX_LIBRARIES];
    int libs;
    int i;

    libs = read_config_file(config, files, insert, min, max);
    for(i = 0; i < libs; i++) {
      int t = 10*sqrt(max[i]-min[i]);
      if (t > max_overlap)
        max_overlap = t;
    }
  }

  printf("Allowing overlaps of length %d without checking\n", max_overlap);

  align = init_align();

  num_contigs = read_contigs(contig_file, &contig_length, &contig_seq, 
                             &contig_names, &contig2id);

  printf("Read contigs\n");

  ifstream solution;

  int count = 1;

  set<int> *sc_new;

  long boundary[2046];
  int bi;

  solution.open(solution_file);

  if (!solution.is_open()) {
    printf("Could not open MIP solution file.\n");
    exit(2);
  }

  while(!solution.eof()) {
    read_scaffolds(&solution, genome_length, &num_scaffolds, &temp_scaffolds,
                   &contig2temppos, contig_length, contig_seq, max_overlap, 
                   align, max_align_err);

    for(int i = 0; i < num_scaffolds; i++) {
      sc_new = new set<int>;
      bi = 0;

      set<int>::iterator it;
      for(it = temp_scaffolds[i].begin(); it != temp_scaffolds[i].end(); it++) {
        if (contig2scaffold[*it] <= 0) {
          (*sc_new).insert(*it);
          contig2scaffold[*it] = count;
          contig2position[*it] = (*contig2temppos)[*it];
        }
      }

      for(it = temp_scaffolds[i].begin(); it != temp_scaffolds[i].end(); it++) {
        if (contig2scaffold[*it] != count) {
          int sc2merge = contig2scaffold[*it];
          long offset = 0;
          long reverse = 0;
          
          if (contig2position[*it] > 0 && (*contig2temppos)[*it] > 0) {
            offset = (*contig2temppos)[*it] - contig2position[*it];
            reverse = 0;
          } else if (contig2position[*it] < 0 && (*contig2temppos)[*it] < 0) {
            offset = -(*contig2temppos)[*it] + contig2position[*it];
            reverse = 0;
          } else if (contig2position[*it] < 0 && (*contig2temppos)[*it] > 0) {
            offset = (*contig2temppos)[*it] + contig2position[*it];
            reverse = -contig2position[*it];
          } else if (contig2position[*it] > 0 && (*contig2temppos)[*it] < 0) {
            offset = -(*contig2temppos)[*it] - contig2position[*it];
            reverse = contig2position[*it];
          }

          set<int>::iterator mit;
          for(mit = (*scaffolds[sc2merge]).begin(); 
              mit != (*scaffolds[sc2merge]).end(); mit++) {
            if (reverse) {
              if (contig2position[*mit] > 0) {
                contig2position[*mit] += 2*(reverse - contig2position[*mit]);
                contig2position[*mit] = -contig2position[*mit];
              } else {
                contig2position[*mit] = -contig2position[*mit];
                contig2position[*mit] += 2*(reverse - contig2position[*mit]);
              }
              if (contig2position[*mit] > 0) {
                contig2position[*mit] += offset;
              } else {
                contig2position[*mit] -= offset;
              }
            } else {
              if (contig2position[*mit] > 0) {
                contig2position[*mit] += offset;
              } else {
                contig2position[*mit] -= offset;
              }
            }
            int t;
            if (contig2position[*mit] < 0) {
              if ((*contig2temppos)[*it] < 0) {
                t = -(*contig2temppos)[*it] + contig2position[*mit];
              } else {
                t = (*contig2temppos)[*it] + contig2position[*mit];
              }
            } else {
              if ((*contig2temppos)[*it] < 0) {
                t = -(*contig2temppos)[*it] - contig2position[*mit];
              } else {
                t = (*contig2temppos)[*it] - contig2position[*mit];
              }
            }
          }

          int overlap = 0;

          set<int>::iterator nit;
          for(mit = (*scaffolds[sc2merge]).begin(); 
              mit != (*scaffolds[sc2merge]).end(); mit++) {
            if (temp_scaffolds[i].find(*mit) != temp_scaffolds[i].end()) {
              continue;
            }

            for(nit = (*sc_new).begin(); 
                nit != (*sc_new).end(); nit++) {
              if (*mit != *nit) {
                long s1, s2, e1, e2;
                long ol;
                
                if (contig2position[*mit] < 0) {
                  e1 = -contig2position[*mit];
                  s1 = e1 - contig_length[*mit];
                } else {
                  s1 = contig2position[*mit];
                  e1 = s1 + contig_length[*mit];
                }

                if (contig2position[*nit] < 0) {
                  e2 = -contig2position[*nit];
                  s2 = e2 - contig_length[*nit];
                } else {
                  s2 = contig2position[*nit];
                  e2 = s2 + contig_length[*nit];
                }

                ol = 0;
                if (s1 < s2) {
                  if (e1 < e2) {
                    ol = e1-s2;
                  } else {
                    ol = e2-s2;
                  }
                } else {
                  if (e1 < e2) {
                    ol = e1-s1;
                  } else {
                    ol = e2-s1;
                  }
                }
                
                if (ol > max_overlap) {
                  if (s1 <= s2 && e2 <= e1) {
                    if (is_contained(contig_seq[*nit], contig2position[*nit] > 0,
                                     contig_length[*nit], contig_seq[*mit], 
                                     contig2position[*mit] > 0, contig_length[*mit], 
                                     s2-s1-1000, s2-s1+1000, align, max_align_err)) {
                      printf("Contig %d is contained in contig %d\n", *nit, *mit);
                      ol = 0;
                    } 
                  } else if (s2 <= s1 && e1 <= e2) {
                    if (is_contained(contig_seq[*mit], contig2position[*mit] > 0,
                                     contig_length[*mit], contig_seq[*nit], 
                                     contig2position[*nit] > 0, contig_length[*nit], 
                                     s1-s2-1000, s1-s2+1000, align, max_align_err)) {
                      printf("Contig %d is contained in contig %d\n", *mit, *nit);
                      ol = 0;
                    } 
                  } else if (s1 <= s2) {
                    if (is_overlap(contig_seq[*mit], contig2position[*mit] > 0,
                                   contig_length[*mit], contig_seq[*nit], 
                                   contig2position[*nit] > 0, contig_length[*nit],
                                   s2-s1-1000, s2-s1+1000, align, max_align_err)) {
                      printf("Contig %d overlaps with contig %d\n", *mit, *nit);
                      ol = 0;
                    }
                  } else {
                    if (is_overlap(contig_seq[*nit], contig2position[*nit] > 0,
                                   contig_length[*nit], contig_seq[*mit], 
                                   contig2position[*mit] > 0, contig_length[*mit],
                                   s1-s2-1000, s1-s2+1000, align, max_align_err)) {
                      printf("Contig %d overlaps with contig %d\n", *nit, *mit);
                      ol = 0;
                    }
                  }
                  
                  if (ol)
                    overlap = 1;
                }
              }
              if (overlap)
                break;
            }
            if (overlap)
              break;
          }

          if (!overlap) {
            for(mit = (*scaffolds[sc2merge]).begin(); 
                mit != (*scaffolds[sc2merge]).end(); mit++) {
              (*sc_new).insert(*mit);
              contig2scaffold[*mit] = count;
            }
            
            delete scaffolds[sc2merge];
            scaffolds[sc2merge] = NULL;
          } else {
            boundary[bi] = contig2position[*it];
            if (boundary[bi] < 0) {
              boundary[bi] = -boundary[bi] - contig_length[*it]/2;
            } else {
              boundary[bi] += contig_length[*it]/2;
            }
            // printf("Splitting scaffold %d at %d\n", count, boundary[bi]);
            
            bi++;
          }
        }
      }

      if ((*sc_new).size() > 0) {
        if (bi > 0) {
          // printf("Computing split scaffolds %d. Boundaries:\n", bi);
          // for(int i = 0; i < bi; i++)
          //   printf("  %d\n", boundary[i]);

          set<int> *sc_add;
          set<int>::iterator nit;
          int s = 0;

          qsort(boundary, bi, sizeof(long), compare_long);

          // printf("Computing split scaffolds. Boundaries:\n");
          // for(int i = 0; i < bi; i++)
          //   printf("  %d\n", boundary[i]);

          boundary[bi] = LONG_MAX;
          for(int j = 0; j < bi+1; j++) {
            sc_add = new set<int>;
            // printf("1  %d-%d\n", s, boundary[j]);
            for(nit = (*sc_new).begin(); nit != (*sc_new).end(); nit++) {
              if (contig2position[*nit] < 0) {
                if (-contig2position[*nit] - contig_length[*nit]/2 >= s && 
                    -contig2position[*nit] - contig_length[*nit]/2 < boundary[j]) {
                  (*sc_add).insert(*nit);
                  contig2scaffold[*nit] = count;
                  // printf("  %d\n", contig2position[*nit]);
                }
              } else {
                if (contig2position[*nit] + contig_length[*nit]/2 >= s && 
                    contig2position[*nit] + contig_length[*nit]/2 < boundary[j]) {
                  (*sc_add).insert(*nit);
                  contig2scaffold[*nit] = count;
                  // printf("  %d\n", contig2position[*nit]);
                }
              }
            }
            s = boundary[j];
            
            if ((*sc_add).size() > 0) {
              scaffolds[count] = sc_add;
              count++;
            }
          }
          delete sc_new;
        } else {
          scaffolds[count] = sc_new;
          count++;
        }
      }
    }

    delete contig2temppos;
    delete [] temp_scaffolds;
      
  }
  
  printf("%d scaffolds\n", count);

  solution.close();

  num_scaffolds = 0;
  for(int i = 1; i < count; i++) {
    if (scaffolds[i] != NULL) {
      num_scaffolds++;
    }
  }

  printf("%d scaffolds after merging\n", num_scaffolds);

  int *used = new int[num_contigs];
  for(int i = 0; i < num_contigs; i++) {
    used[i] = 0;
  }

  num_scaffolds = 1;
  for(int i = 1; i < count; i++) {
    set<int> *s = scaffolds[i];
    if (s != NULL) {
      contig_t *contigs = new contig_t[(*s).size()];
      set<int>::iterator it;
      int j = 0;
      for(it = (*s).begin(); it != (*s).end(); it++) {
        contigs[j].contig = *it;
        if (contig2position[*it] > 0) {
          contigs[j].start = contig2position[*it];
          contigs[j].end = contig2position[*it] + contig_length[*it];
        } else {
          contigs[j].start = -contig2position[*it];
          contigs[j].end = -contig2position[*it] - contig_length[*it];
        }
        j++;
      }

      qsort(contigs, (*s).size(), sizeof(contig_t), compare_contig);

      long min = (contigs[0].start < contigs[0].end) ? 
        contigs[0].start : contigs[0].end;
      long max = 0;
      for(j = 0; j < (int)(*s).size(); j++) {
        if (contigs[j].start > max)
          max = contigs[j].start;
        if (contigs[j].end > max)
          max = contigs[j].end;
      }

      printf("Scaffold %d ( contigs= %d len= %ld ):\n", num_scaffolds,
             (int)(*s).size(), max-min);
      for(j = 0; j < (int)(*s).size(); j++) {
        printf("  %d:\t%ld\t%ld\n", contigs[j].contig, 
               contigs[j].start, contigs[j].end);
        used[contigs[j].contig] = 1;
      }
      num_scaffolds++;

      delete [] contigs;
    }
  }

  for(int i = 0; i < num_contigs; i++) {
    if (!used[i]) {
      printf("Scaffold %d ( contigs= 1 len= %d ):\n", num_scaffolds, 
             contig_length[i]);
        printf("  %d:\t%d\t%d\n", i, 
               1000, 1000+contig_length[i]);
      num_scaffolds++;
    }
  }


  return 0;
}
