// --*- C++ -*------x---------------------------------------------------------
// $Id: histogram.cc,v 1.2 2007/08/14 18:27:51 bindewae Exp $
//
// Program:         histogram
//
// Author:          Eckart Bindewald
//
// Project name:    Bayesfold
//
// Date:            $Date: 2007/08/14 18:27:51 $
//
// Description:     Compute histogram from input data. See help output.
// 
// -----------------x-------------------x-------------------x-----------------

#include <iostream>
#include <string>
#include <debug.h>
#include <Vec.h>
#include <StringTools.h>
#include <GetArg.h>
#include <vectornumerics.h>

void
helpOutput(ostream& os) 
{
  os << "Compute histgram from data stream. Usage: " << endl;
  os << "histogram [--bins value][--delta value][--field value][--line 0|1][--min value][--normalize 0|1][--sum 0|1][--vector][--verbose value] < inputdata" << endl;
  os << "Data is in gnuplot-like format (default) or vector format (--vector)" 
     << endl;
  
}

int
main(int argc, char ** argv)
{
  int normalizeMode = 0;
  bool vectorMode = false;
  unsigned int field = 1;
  unsigned int lineMode = 0;
  unsigned int numBins  = 100; // number of bins
  double pseudoCount = 0.0;
  unsigned int sumMode = 0;
  unsigned int verboseLevel = 1;

  string line;
  // read data
  Vec<double> data;
  double min = 0.0; // minimum value counted in bins
  double delta = 1.0; // breadth of bins

  getArg("f", field, argc, argv, field);
  getArg("-min", min, argc, argv, min);
  getArg("-delta", delta, argc, argv, delta);
  getArg("-bins", numBins, argc, argv, numBins);
  getArg("-line", lineMode, argc, argv, lineMode);
  getArg("-normalize", normalizeMode, argc, argv, normalizeMode);
  getArg("-pseudo", pseudoCount, argc, argv, pseudoCount);
  getArg("-sum", sumMode, argc, argv, sumMode);
  getArg("-vector", vectorMode, argc, argv);
  getArg("-verbose", verboseLevel, argc, argv, verboseLevel);

  if (verboseLevel > 0) {
    helpOutput(cout);
  }

  ERROR_IF(delta<= 0.0, "Delta value (breadth of bins) must be greater zero. ");

  ERROR_IF(field == 0, "Field descriptor (-f value) must be great zero!" );
  --field;  // internal counting starts at zero
  if (verboseLevel > 1) {
    cout << "Reading input data...";
  }

  if (vectorMode) {
    cin >> data;
  }
  else {
    while (cin) {
      line = getLine(cin);
      Vec<string> words = getTokens(line);
      if (words.size() > field) {
	data.push_back(stod(words[field]));
      }
    }
  }
  if (verboseLevel > 1) {
    cout << "done." << endl;
    cout << data.size() << " values read." << endl;
  }
  if (verboseLevel > 2) {
    cout << "Read data:" << endl;
    outList(cout,data);
    cout << endl;
  }  

  Vec<unsigned int> result = computeHistogram(data, min, delta, numBins);
  Vec<double> resultDouble(result.size());
  for (unsigned int i = 0; i < resultDouble.size(); ++i) {
    resultDouble[i] = result[i];
  }
  if (pseudoCount > 0) {
    for (unsigned int i = 0; i < result.size(); ++i) {
      result[i] += static_cast<unsigned int>(pseudoCount);
      resultDouble[i] += pseudoCount;
    }
  }
  if (sumMode) {
    result = accumulation(result); // accumulative histogram (integral)
  }

  if (vectorMode) {
    cout << result.size() << endl;
  }
  double x = min;
  double norm = 0.0;
  Vec<double> v(result.size());
  switch (normalizeMode) {
  case 0:
    for (unsigned int i = 0; i < result.size(); ++i) {
      if (lineMode && (i > 0) && (result[i] == result[i-1])) {
        x += delta;
        continue;
      }
      cout << x << "\t" << result[i] << endl;
      x += delta;
    }
    break;
  case 1:  

    norm = 1.0 / static_cast<double>(data.size());
    for (unsigned int i = 0; i < v.size(); ++i) {
      v[i] = norm * resultDouble[i];
    }

    for (unsigned int i = 0; i < v.size(); ++i) {
      if (lineMode && (i > 0) && (result[i] == result[i-1])) {
        x += delta;
        continue;
      }
      cout << x << "\t" << v[i] << endl;
      x += delta;
    }
    break;
  case 2:
    for (unsigned int i = 0; i < v.size(); ++i) {
      norm += resultDouble[i];
    }
    if (verboseLevel > 0) {
      cerr << "Sum is : " << norm << endl;
    }
    norm = 1.0 / norm;
    if (verboseLevel > 0) {
      cerr << "Norm is: " << norm << endl;
    }
    for (unsigned int i = 0; i < v.size(); ++i) {
      v[i] = norm * resultDouble[i];
    }

    for (unsigned int i = 0; i < v.size(); ++i) {
      if (lineMode && (i > 0) && (result[i] == result[i-1])) {
        x += delta;
        continue;
      }
      cout << x << "\t" << v[i] << endl;
      x += delta;
    }
    break;
  default: ERROR("Unknown normalization mode!");
  }

  return 0;
}
