#include <cstdlib>
#include <fstream>
#include <iostream>


typedef unsigned int uint;
const uint MEGABYTE = 1048576;

const char letters[4] = { 'A', 'C', 'G', 'T' };


int
main(int argc, char** argv)
{
  std::cout << "DNA sequence mutator" << std::endl;
  std::cout << std::endl;
  if(argc < 6)
  {
    std::cout << "Usage: mutator input output base repetitions rate [seed]" << std::endl;
    std::cout << "    input          input file" << std::endl;
    std::cout << "    output         output file" << std::endl;
    std::cout << "    base           base sequence length (MB)" << std::endl;
    std::cout << "    repetitions    number of repetitions" << std::endl;
    std::cout << "    rate           mutation rate" << std::endl;
    std::cout << "    seed           random seed (optional; default 0xDEADBEEF)" << std::endl;
    std::cout << std::endl;
    return 1;
  }

  std::cout << "Input: " << argv[1] << std::endl;
  std::ifstream input(argv[1], std::ios_base::binary);
  if(!input)
  {
    std::cerr << "Error opening input file!" << std::endl;
    return 2;
  }
  std::cout << "Output: " << argv[2] << std::endl;
  std::ofstream output(argv[2], std::ios_base::binary);
  if(!output)
  {
    std::cerr << "Error opening output file!" << std::endl;
    return 3;
  }
  uint base = atoi(argv[3]);
  std::cout << "Base sequence length: " << base << " MB" << std::endl;
  uint repetitions = atoi(argv[4]);
  std::cout << "Number of repetitions: " << repetitions << std::endl;
  double rate = atof(argv[5]);
  std::cout << "Mutation rate: " << rate << std::endl;
  uint seed = 0xDEADBEEF;
  if(argc >= 7)
  {
    seed = atoi(argv[6]);
  }
  std::cout << "Random seed: " << seed << std::endl;

  srand(seed);
  uint mutations = 0;
  char* source = new char[base * MEGABYTE];
  char* buffer = new char[base * MEGABYTE];
  input.read(source, base * MEGABYTE);
  uint size = input.gcount();
  std::cout << "Actual base sequence length: " << size << " bytes (" << ((double)size / MEGABYTE) << " MB)" << std::endl;
  std::cout << std::endl;

  output.write(source, size);
  for(uint i = 1; i < repetitions; i++)
  {
    for(uint j = 0; j < size; j++)
    {
      if(rand() / (RAND_MAX + 1.0) < rate)
      {
        uint tmp;
        switch(source[j])
        {
          case 'A':
            tmp = rand() % 3 + 1;
            break;
          case 'C':
            tmp = rand() % 3 + 2;
            break;
          case 'G':
            tmp = rand() % 3 + 3;
            break;
          case 'T':
            tmp = rand() % 3;
            break;
          default:
            tmp = rand() & 3;
            break;
        }
        buffer[j] = letters[tmp & 3];
        mutations++;
      }
      else
      {
        buffer[j] = source[j];
      }
    }
    output.write(buffer, size);
  }

  std::cout << "Number of mutations: " << mutations << std::endl;
  std::cout << std::endl;
  return 0;
}

