/**
 * Copyright (c) 2011, Leena Salmela <leena.salmela@cs.helsinki.fi>
 * All rights reserved.
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <fstream>
#include <iostream>

#include <stdlib.h>
#include <string.h>

using namespace std;

#define MAX_CONTIG_NAME_LEN 1024
#define LINE_LEN 4096

typedef struct {
  char contig1[MAX_CONTIG_NAME_LEN];
  char contig2[MAX_CONTIG_NAME_LEN];
  int pos1;
  int pos2;
  char ori1;
  char ori2;

  int ok;

  char line[LINE_LEN];
} mapping_t;

int read_mapping_line(ifstream *input, mapping_t *mapping, int reverse) {

  if ((*input).getline(mapping->line, LINE_LEN).eof()) {
    return 1;
  }

  char *p;
  int i;

  p = mapping->line;

  // Skip read name
  while(*p != '\t' && *p != '\0') p++;
  p++;

  // Orientation
  i = atoi(p);
  if (reverse) {
    mapping->ori2 = (i & 0x10) ? 'R' : 'F';
  } else {
    mapping->ori1 = (i & 0x10) ? 'R' : 'F';
  }
  while(*p != '\t' && *p != '\0') p++;
  p++;

  // Copy contig name
  if (reverse) {
    i = 0;
    while(*p != '\t' && *p != '\0') {
      mapping->contig2[i] = *p;
      i++;
      p++;
    }
    mapping->contig2[i] = '\0';
  } else {
    i = 0;
    while(*p != '\t' && *p != '\0') {
      mapping->contig1[i] = *p;
      i++;
      p++;
    }
    mapping->contig1[i] = '\0';
  }
  p++;

  // Start position
  if (reverse)
    mapping->pos2 = atoi(p);
  else
    mapping->pos1 = atoi(p);

  while(*p != '\t' && *p != '\0') p++;
  p++;
  // Skip remaining fields of first entry
  while(*p != '\t' && *p != '\0') p++;
  p++;
  while(*p != '\t' && *p != '\0') p++;
  p++;
  while(*p != '\t' && *p != '\0') p++;
  p++;
  while(*p != '\t' && *p != '\0') p++;
  p++;
  while(*p != '\t' && *p != '\0') p++;
  p++;
  while(*p != '\t' && *p != '\0') p++;
  p++;
  while(*p != '\t' && *p != '\0') p++;
  p++;

  // Skip read name
  while(*p != '\t' && *p != '\0') p++;
  p++;

  // Orientation
  i = atoi(p);
  if (reverse) {
    mapping->ori1 = (i & 0x10) ? 'R' : 'F';
  } else {
    mapping->ori2 = (i & 0x10) ? 'R' : 'F';
  }
  while(*p != '\t' && *p != '\0') p++;
  p++;

  // Copy contig name
  if (reverse) {
    i = 0;
    while(*p != '\t' && *p != '\0') {
      mapping->contig1[i] = *p;
      i++;
      p++;
    }
    mapping->contig1[i] = '\0';
  } else {
    i = 0;
    while(*p != '\t' && *p != '\0') {
      mapping->contig2[i] = *p;
      i++;
      p++;
    }
    mapping->contig2[i] = '\0';
  }  
  p++;

  // Start position
  if (reverse)
    mapping->pos1 = atoi(p);
  else
    mapping->pos2 = atoi(p);

  mapping->ok = 0;

  return 0;
}

#define MAX_MAPPINGS 10

#define WINDOW_WIDTH 50

#define MAJORITY_VALUE 1.0

mapping_t *mF;
mapping_t *mR;

int compare(const void *p1, const void *p2) {
  mapping_t *m1 = (mapping_t *)p1;
  mapping_t *m2 = (mapping_t *)p2;

  int res = strcmp(m1->contig1, m2->contig1);

  if (res != 0) {
    printf("Invalid entry to sort %s %s\n", m1->contig1, m2->contig1);
    exit(1);
  }

  return m1->pos1 - m2->pos1;
}

void usage(char *prog) {
  printf("Usage: %s [options] <mappings sorted by F3 mapped contig> <mappings sorted by R3 mapped contig>\n\n", prog);

  printf("Filtered mappings are outputted to stdout.\n\n");

  printf("Options:\n");
  printf("-w <int>     The width of the window for consistent mappings.\n");
  printf("-p <float>   Fraction of agreeing mappings considered consistent\n");
}

int main(int argc, char **argv) {
  ifstream input1;
  ifstream input2;
  mapping_t m1;
  mapping_t m2;

  int end1=0, end2=0;
  int iF,iR;
  char current_contig[MAX_CONTIG_NAME_LEN];

  int w = WINDOW_WIDTH;
  double perc = MAJORITY_VALUE;
  char *prog = argv[0];

  int max_mappings = MAX_MAPPINGS;

  mF = new mapping_t[max_mappings];
  mR = new mapping_t[max_mappings];

  while(argc > 2) {
    if (!strcmp(argv[0], "-w")) {
      w = atoi(argv[1]);
      argv++;
      argc--;
    } else if (!strcmp(argv[0], "-p")) {
      perc = atof(argv[1]);
      argv++;
      argc--;
    }

    argv++;
    argc--;
  }

  if (argc == 2) {
    input1.open(argv[0]);
    input2.open(argv[1]);
  } else {
    usage(prog);
    abort();
  }

  end1 = read_mapping_line(&input1, &m1, 0);
  end2 = read_mapping_line(&input2, &m2, 1);

  while(!end1 || !end2) {
    if ((!end1 && (strcmp(m1.contig1, m2.contig1) < 0)) || end2)
      strcpy(current_contig, m1.contig1);
    else
      strcpy(current_contig, m2.contig1);
    
    iF = iR = 0;
    while((strcmp(m1.contig1, current_contig) == 0) && !end1) {
      if (strcmp(m1.contig1, m1.contig2) != 0) {
        if (m1.ori1 == 'F') {
          strcpy(mF[iF].contig1, m1.contig1);
          strcpy(mF[iF].contig2, m1.contig2);
          mF[iF].pos1 =  m1.pos1;
          mF[iF].pos2 =  m1.pos2;
          mF[iF].ori1 =  m1.ori1;
          mF[iF].ori2 =  m1.ori2;
          mF[iF].ok =  m1.ok;
          strcpy(mF[iF].line, m1.line);
          iF++;
        } else {
          strcpy(mR[iR].contig1, m1.contig1);
          strcpy(mR[iR].contig2, m1.contig2);
          mR[iR].pos1 =  m1.pos1;
          mR[iR].pos2 =  m1.pos2;
          mR[iR].ori1 =  m1.ori1;
          mR[iR].ori2 =  m1.ori2;
          mR[iR].ok =  m1.ok;
          strcpy(mR[iR].line, m1.line);
          iR++;
        }
      }

      if (iR >= max_mappings || iF >= max_mappings) {
        mapping_t *temp;
        int i;

        temp = new mapping_t[2*max_mappings];
        for(i = 0; i < iR; i++) {
          temp[i] = mR[i];
        }
        delete [] mR;
        mR = temp;

        temp = new mapping_t[2*max_mappings];
        for(i = 0; i < iF; i++) {
          temp[i] = mF[i];
        }
        delete [] mF;
        mF = temp;

        max_mappings = 2*max_mappings;

        // fprintf(stderr, "Resized %d\n", max_mappings);

        // printf("Too many mappings %s\n", current_contig);
        // exit(1);
      }

      end1 = read_mapping_line(&input1, &m1, 0);
    }

    while((strcmp(m2.contig1, current_contig) == 0) && !end2) {
      if (strcmp(m2.contig1,  m2.contig2) != 0) {
        if (m2.ori1 == 'R') {
          strcpy(mF[iF].contig1, m2.contig1);
          strcpy(mF[iF].contig2, m2.contig2);
          mF[iF].pos1 =  m2.pos1;
          mF[iF].pos2 =  m2.pos2;
          mF[iF].ori1 =  m2.ori1;
          mF[iF].ori2 =  m2.ori2;
          mF[iF].ok =  m2.ok;
          strcpy(mF[iF].line, m2.line);
          iF++;
        } else {
          strcpy(mR[iR].contig1, m2.contig1);
          strcpy(mR[iR].contig2, m2.contig2);
          mR[iR].pos1 =  m2.pos1;
          mR[iR].pos2 =  m2.pos2;
          mR[iR].ori1 =  m2.ori1;
          mR[iR].ori2 =  m2.ori2;
          mR[iR].ok =  m2.ok;
          strcpy(mR[iR].line, m2.line);
          iR++;
        }
      }

      if (iR >= max_mappings || iF >= max_mappings) {
        mapping_t *temp;
        int i;

        temp = new mapping_t[2*max_mappings];
        for(i = 0; i < iR; i++) {
          temp[i] = mR[i];
        }
        delete [] mR;
        mR = temp;

        temp = new mapping_t[2*max_mappings];
        for(i = 0; i < iF; i++) {
          temp[i] = mF[i];
        }
        delete [] mF;
        mF = temp;

        max_mappings = 2*max_mappings;

        // fprintf(stderr, "Resized %d\n", max_mappings);
        // printf("Too many mappings %s\n", current_contig);
        // exit(1);
      }

      end2 = read_mapping_line(&input2, &m2, 1);
    }

     // fprintf(stderr, "%d + %d mappings\n", iF, iR);

    if (iF > 0) {

      // int i;
      // for(i = 0; i < iF; i++) {
      //   fprintf(stderr, "%s %d %s %d\n", mF[i].contig1, mF[i].pos1, 
      //           mF[i].contig2, mF[i].pos2);
      // }

      qsort(mF, iF, sizeof(mapping_t), compare);

      // fprintf(stderr,"Sorted:\n");
      // for(i = 0; i < iF; i++) {
      //   fprintf(stderr, "%s %d %s %d\n", mF[i].contig1, mF[i].pos1, 
      //           mF[i].contig2, mF[i].pos2);
      // }

      // fprintf(stderr, "done\n");

      int start = 0;
      int end = start;

      while(end < iF) {
        while(mF[end].pos1 < mF[start].pos1 + w && end < iF) {
          end++;
        }
#ifdef MAJORITY_VALUE
        char *guess = NULL;
        int counter = 0;
        for(int i = start; i < end; i++) {
          if (counter == 0) {
            guess = mF[i].contig2;
            counter = 1;
          } else if (strcmp(mF[i].contig2, guess) == 0) {
            counter++;
          } else {
            counter--;
          }
        }

        counter = 0;
        for(int i = start; i < end; i++) {
          if (strcmp(mF[i].contig2, guess) == 0)
            counter++;
        }

        if (counter >= (end-start)*perc) {
          // Majority found
          for(int i = start; i < end; i++) {
            if (strcmp(mF[i].contig2, guess) == 0) {
              mF[i].ok = 1;
            }
          }
        } else {
          // for(int i = start; i < end; i++) {
          //   mF[i].ok = 0;
          // }
        }
#else
        int ok = 1;
        for(int i = start+1; i < end; i++) {
          if (strcmp(mF[i-1].contig2, mF[i].contig2) != 0) {
            ok = 0;
            break;
          }
        }
        if (ok) {
          for(int i = start; i < end; i++) {
            mF[i].ok = 1;
          }
        }
#endif
        start++;
      }
    }

    for(int i = 0; i < iF; i++) {
      if (mF[i].ok) {
        printf("%s\n", mF[i].line);
      }
    }

    if (iR > 0) {

      // int i;
      // for(i = 0; i < iR; i++) {
      //   fprintf(stderr, "%s %d %s %d\n", mR[i].contig1, mR[i].pos1, 
      //           mR[i].contig2, mR[i].pos2);
      // }

      qsort(mR, iR, sizeof(mapping_t), compare);

      // fprintf(stderr, "Sorted:\n");
      // for(i = 0; i < iR; i++) {
      //   fprintf(stderr, "%s %d %s %d\n", mR[i].contig1, mR[i].pos1, 
      //           mR[i].contig2, mR[i].pos2);
      // }

      // fprintf(stderr, "done\n");

      int start = 0;
      int end = start;

      while(end < iR) {
        while(mR[end].pos1 < mR[start].pos1 + w && end < iR) {
          end++;
        }
#ifdef MAJORITY_VALUE
        char *guess = NULL;
        int counter = 0;
        for(int i = start; i < end; i++) {
          if (counter == 0) {
            guess = mR[i].contig2;
            counter = 1;
          } else if (strcmp(mR[i].contig2, guess) == 0) {
            counter++;
          } else {
            counter--;
          }
        }

        counter = 0;
        for(int i = start; i < end; i++) {
          if (strcmp(mR[i].contig2, guess) == 0)
            counter++;
        }

        if (counter >= (end-start)*perc) {
          // Majority found
          for(int i = start; i < end; i++) {
            if (strcmp(mR[i].contig2, guess) == 0)
              mR[i].ok = 1;
          }
        } else {
          // for(int i = start; i < end; i++) {
          //   mR[i].ok = 0;
          // }
        }
#else
        int ok = 1;
        for(int i = start+1; i < end; i++) {
          if (strcmp(mR[i-1].contig2, mR[i].contig2) != 0) {
            ok = 0;
            break;
          }
        }
        if (ok) {
          for(int i = start; i < end; i++) {
            mR[i].ok = 1;
          }
        }
#endif
        start++;
      }
    }

    for(int i = 0; i < iR; i++) {
      if (mR[i].ok) {
        printf("%s\n", mR[i].line);
      }
    }

  }

  input1.close();
  input2.close();

  return 0;
}
