// --*- C++ -*------x---------------------------------------------------------
// $Id: KnnNetTrainer.cc,v 1.1.1.1 2006/07/03 14:43:20 bindewae Exp $
//
// Class:           SequencePairCluster
// 
// Base class:      -
//
// Derived classes: - 
//
// Author:          Eckart Bindewald
//
// Project name:    -
//
// Date:            $Date: 2006/07/03 14:43:20 $
//
// Description:     - 
// -----------------x-------------------x-------------------x-----------------

#include <KnnNetTrainer.h>
#include <clusterAlgorithms.h>
#include <StringTools.h>
#include <vectornumerics.h>
#include <Random.h>

// ---------------------------------------------------------------------------
//                                   SequencePairCluster
// -----------------x-------------------x-------------------x-----------------

/* CONSTRUCTORS */

/* default constructor */
KnnNetTrainer::KnnNetTrainer() : numClasses(0), verboseLevel(0)
{
}

/* copy constructor */
KnnNetTrainer::KnnNetTrainer(const KnnNetTrainer& other)
{
  copy(other);
}

/* destructor */
KnnNetTrainer::~KnnNetTrainer() { }


/* OPERATORS */

/** Assigment operator. */
KnnNetTrainer& 
KnnNetTrainer::operator = (const KnnNetTrainer& orig)
{
  if ((&orig) != this) {
    copy(orig);
  }
  return *this;
}

ostream& 
operator << (ostream& os, const KnnNetTrainer& rval)
{
  ERROR("Ouput operator not yet implemented!");
  return os;
}

istream& 
operator >> (istream& is, KnnNetTrainer& rval)
{
  ERROR("Input operator not yet implemented!");
  return is;
}

/* PREDICATES */

/** returns data rows which belong to class dataClass */
Vec<Vec<double> >
KnnNetTrainer::getData(unsigned int dataClass) const
{
  // count number of entries of that class:
  unsigned int n = count(dataClasses.begin(), dataClasses.end(),
			 dataClass);
  Vec<Vec<double> > result(n);
  unsigned int pc = 0;
  for (unsigned int i = 0; i < dataClasses.size(); ++i) {
    if (dataClasses[i] == dataClass) {
      result[pc++] = data[i];
    }
  }
  return result;
}

/** returns indices of data rows which belong to class dataClass */
Vec<unsigned int>
KnnNetTrainer::getDataIndices(unsigned int dataClass) const
{
  // count number of entries of that class:
  unsigned int n = count(dataClasses.begin(), dataClasses.end(),
			 dataClass);
  Vec<unsigned int> result(n);
  unsigned int pc = 0;
  for (unsigned int i = 0; i < dataClasses.size(); ++i) {
    if (dataClasses[i] == dataClass) {
      result[pc++] = i;
    }
  }
  return result;
}

void
KnnNetTrainer::writeLevelTrainVectors(ostream& writeFile, 
				     int numEntries,
				      const ClassifierBase& knnNet) const
{
  int counter = 0;
  for (unsigned int i = 0; i < data.size(); ++i) {
    // if ((dataClasses[i] == 0) && (rnd.getRandf() > 0.1)) {
    //    continue;
    //  }
    //  cout << "Testing knnNet with vector: " << data[i] << "  " << dataClasses[i] << endl;
    knnNet.predictClassProbTrain(data[i], dataClasses[i], writeFile);
    ++counter;
    if ((numEntries > 0) && (counter >= numEntries)) {
      break;
    }
  }  
}

double
KnnNetTrainer::estimateAccuracy(const ClassifierBase& knnNet) const
{
  cout << "KnnNetTrainer:: Starting estimateAccuracy!" << endl;
  Random& rnd = Random::getInstance();
  Vec<double> classProb;
  unsigned int tp = 0;
  unsigned int fp = 0;
  unsigned int tn = 0;
  unsigned int fn = 0;
  unsigned int numBins = 10;
  Vec<int> trueCount(numBins, 1);
  Vec<int> falseCount(numBins, 1); // pseudo count
  int bin;
  for (unsigned int i = 0; i < data.size(); ++i) {
    double classProb = knnNet.predictClassProb(data[i])[0];
    if (classProb >= 1.0) {
      classProb = 0.999;
    }
    if (classProb < 0.0) {
      classProb = 0.0;
    }
    bin = static_cast<int>(classProb * numBins);
    unsigned int cl = 0;    
    if (classProb >= 0.5) {
      cl = 1;
    }
    if (dataClasses[i] == cl) {
      if (cl == 0) {
	++tn;
	++falseCount[bin];
      }
      else {
	++tp;
	++trueCount[bin];
      }
    }
    else {
      if ((verboseLevel > 0) && ((rnd.getRand() % 10) == 0)) {
	cout << "Sample bad case: " << classProb << " " 
	     << cl << " " << dataClasses[i] << " "
	     << data[i] << endl;
      }
      if (cl == 0) {
	++fn;
	++trueCount[bin];
      }
      else {
	++fp;
	++falseCount[bin];
      }
    }
  }
  cout << "estimateAccuracy tp tn fp fn mathews: " << tp << " " << tn << " "
       << fp << " " << fn << " " << computeMathews(tp, fp, tn, fn) << endl;
  cout << "Histogram: ";
  for (unsigned int i = 0; i < numBins; ++i) {
    cout << static_cast<double>(trueCount[i]) 
      / static_cast<double>(trueCount[i] + falseCount[i] + 2) << " ";
  }
  cout << endl;
  return - static_cast<double>(fn + fp);
}

void
KnnNetTrainer::optimize(ClassifierBase& knnNet, unsigned int numSteps,
			 unsigned int numNodeSteps,
			 int verboseLevel)
{
  ERROR("Sorry, KnnNetTrainer::optimize currently not implemented!");
  /*
  double oldAcc = estimateAccuracy(knnNet);
  double newAcc = oldAcc;
  Random& rnd = Random::getInstance();
  unsigned int numTrials = 10; // dummy
  double stepWidth = 0.1;
  for (unsigned int i = 0; i < numSteps; ++i) {
    cout << "Optimization cycle step " << i + 1 << endl;
    // cout << "Current knnNet: " << endl << knnNet << endl;
    unsigned int knnId = rnd.getRand(knnNet.size());
    KnnNode& node = knnNet.getNode2(knnId);
    Vec<double> scaling = node.getScaling();
    unsigned int kk = node.getK();
    cout << "Optimization cycle step " << i + 1 << endl;
    cout << "Current knnNet: " << endl << knnNet << endl;
    cout << "Optimizing randomly chosen node number " << knnId + 1 << endl;
    node.optimizeScaling(numNodeSteps, verboseLevel, stepWidth, numTrials);
    newAcc = estimateAccuracy(knnNet);
    cout << "New and old accuracy: " << newAcc << " " << oldAcc << endl;
    if (newAcc < oldAcc) {
      cout << "Rejecting step!" << endl;
      node.setScaling(scaling);
      node.setK(kk);
    }
    else {
      cout << "Accepting step!" << endl;
      oldAcc = newAcc;
    }
  }
  cout << "Final accuracy: " << oldAcc << endl;
  cout << "Final knnNet: " << endl << knnNet << endl;
  */
}

/* MODIFIERS */

/* copy method */
void 
KnnNetTrainer::copy(const KnnNetTrainer& other)
{
  numClasses = other.numClasses;
  verboseLevel = other.verboseLevel;
  data = other.data;
  dataClasses = other.dataClasses;
}

/** read input data */
void
KnnNetTrainer::readData(istream& is)
{
  string line;
  unsigned int classCol = 0; 
  // unsigned int dim = mask.size();
  this->numClasses = 1;
  unsigned int highestClass = 0; // highest defined class id so far
  line = getLine(is);
  vector<string> tokens = getTokens(line);
  classCol = tokens.size()-1;
  classCol = tokens.size()-1;
  unsigned int dim = tokens.size()-1;
  Vec<double> dataVec = Vec<double>(dim, 0.0);
  for (unsigned int i = 0; i < dim; ++i) {
    dataVec[i] = stod(tokens[i]);
  }
  unsigned int dataClass = stoui(tokens[classCol]);
  if (dataClass > highestClass) {
    highestClass = dataClass;
    numClasses = highestClass + 1; // counting starts from zero
  }
  data.push_back(dataVec);
  dataClasses.push_back(dataClass);
  while (is) {
    line = getLine(is);
    tokens = getTokens(line);
    if (tokens.size() <= classCol) {
      continue;
    }
    // cout << "Tokens: " << tokens.size() << endl;
    for (unsigned int i = 0; i < dim; ++i) {
      dataVec[i] = stod(tokens[i]);
    }
    ERROR_IF(classCol >= tokens.size(), "Undefined class column!");
    unsigned int dataClass = stoui(tokens[classCol]);
    if (dataClass > highestClass) {
      highestClass = dataClass;
      numClasses = highestClass + 1; // counting starts from zero
    }
    data.push_back(dataVec);
    dataClasses.push_back(dataClass);
  }
  // cout << "End of KnnNetTrainer::readData!" << endl;
}


/** read input data */
void
KnnNetTrainer::readQueryData(istream& is)
{
  string line;
  // unsigned int dim = mask.size();
  line = getLine(is);
  vector<string> tokens = getTokens(line);
  unsigned int dim = tokens.size();
  Vec<double> dataVec = Vec<double>(dim, 0.0);
  for (unsigned int i = 0; i < dim; ++i) {
    dataVec[i] = stod(tokens[i]);
  }
  data.push_back(dataVec);
  while (is) {
    line = getLine(is);
    tokens = getTokens(line);
    if (tokens.size() < dim) {
      continue;
    }
    // cout << "Tokens: " << tokens.size() << endl;
    for (unsigned int i = 0; i < dim; ++i) {
      dataVec[i] = stod(tokens[i]);
    }
    data.push_back(dataVec);
  }
  // cout << "End of KnnNetTrainer::readData!" << endl;
}

