// --*- C++ -*------x---------------------------------------------------------
#ifndef __CORRELATION_TOOLS__
#define __CORRELATION_TOOLS__

#include <debug.h>
#include <Vec.h>
#include <HashCorrelationFinder.h>
#include <Stem.h>
#include <SingleLinkage2DProgressiveFilter.h>
#include <clusterAlgorithms.h>

class CorrelationTools {

 public:

  typedef MAFAlignment::length_type length_type;
  typedef MAFAlignment::size_type size_type;
  typedef Vec<Correlation> result_container;
  
 public:

  Vec<Vec<unsigned int> > clusterCorrelations(const Vec<Correlation>& stems, double cutoff) {
    // REMARK << "Clustering subset of " << stems.size() << " stems (single-linkage, cutoff: " << cutoff << " )" << endl;
    ERROR("CorrelationTools::clusterCorrelations is not yet implemented.");
    Vec<Vec<double> > distances; //  = StemTools::convertStemEndsToDistanceMatrix(stems);
    /*       if (verbose > 1) { */
    /* 	REMARK << "Distance matrix:" << endl; */
    /* 	cout << distances << endl; */
    /*       } */
    Vec<Vec<unsigned int> > clusters = singleLinkage(distances, cutoff);
    return clusters;
  }

  /** Clusters stems; clusters of size one consisting of one stem of length one are not stored. Taken from similar method in InteractionClusterAnalyzer  */
  Vec<Vec<unsigned int> > clusterCorrelationsFast(const Vec<Correlation>& correlations, double cutoff, bool ignoreSingles) {
    REMARK << "Clustering correlations (single-linkage, cutoff: " << cutoff << " )" << endl;
    ASSERT(singleLinkage1dTest());
    Vec<double> starts(correlations.size());
    double cutoff1d = cutoff; // 100; // because of fancy distance measure between correlations: consider cutoff1d = cutoff + 100, meaning correlations with length greater 100 are really unlikely; in other words: preclustering is on "safe side"
    for (Vec<Correlation>::size_type i = 0; i < correlations.size(); ++i) {
      starts[i] = static_cast<double>(correlations[i].getStart());
    }
    Vec<Vec<unsigned int> > startClusters = singleLinkage1d(starts, cutoff1d); // fast preclustering using only first coordinate
    Vec<Vec<unsigned int> > finalClusters;
    for (Vec<Vec<unsigned int> >::size_type i = 0; i < startClusters.size(); ++i) { // should be much smaller size
      Vec<Correlation> subsetCorrelations = getSubset(correlations, startClusters[i]);
      Vec<double> stops(subsetCorrelations.size());
      for (Vec<Correlation>::size_type j = 0; j < subsetCorrelations.size(); ++j) {
	stops[j] = static_cast<double>(subsetCorrelations[j].getStop());
      }
      Vec<Vec<unsigned int> > stopClusters = singleLinkage1d(stops, cutoff1d); // fast preclustering using only second coordinate
      for (Vec<Vec<unsigned int> >::size_type m = 0; m < stopClusters.size(); ++m) { // should be much smaller size
	Vec<Correlation> subsubsetCorrelations = getSubset(subsetCorrelations, stopClusters[m]);
	Vec<Vec<unsigned int> > subClusters = clusterCorrelations(subsubsetCorrelations, cutoff);
	for (Vec<Vec<unsigned int> >::size_type j = 0; j < subClusters.size(); ++j) {
	  Vec<unsigned int> newCluster;
	  ASSERT(newCluster.size() == 0);
	  for (Vec<Vec<unsigned int> >::size_type k = 0; k < subClusters[j].size(); ++k) {
	    unsigned int id = startClusters[i][stopClusters[m][subClusters[j][k]]];
	    ERROR_IF(id >= correlations.size(), "Internal error lin line 169 while clustering correlations!");
	    newCluster.push_back(id); // make sure correct ids are used!
	  }
	  ERROR_IF(newCluster.size() == 0, "Internal error in line 172 while clustering correlations!");
	  ERROR_IF(newCluster[0] >= correlations.size(), "Internal error in line 173 while clustering correlations!");
	  if ((!ignoreSingles) || (newCluster.size() > 1) ) {
	    finalClusters.push_back(newCluster);
	  }
	}
      }
    } 
    return finalClusters;
  }

  /** Converts between internal coordinates (the column number) to assembly coordinates */
  static
  void convertInternalToAssemblyCoordinates(Correlation& corr, const string& assembly, const MAFAlignment& maf) {
    corr.setStart(maf.getAssemblyPosition(corr.getStart(), assembly));
    corr.setStop(maf.getAssemblyPosition(corr.getStop(), assembly));
    if (!corr.validate()) {
      corr.setStart(-1);
      corr.setStop(-1);
    }
    // ASSERT(corr.validate());
  }
  
  /** Converts between internal coordinates (the column number) to assembly coordinates */
  template <typename _Iterator>
  static
  void convertInternalToAssemblyCoordinates(_Iterator itBegin, _Iterator itEnd, const string& assembly, const MAFAlignment& maf) {
    for (_Iterator it = itBegin; it != itEnd; it++) {
      convertInternalToAssemblyCoordinates(*it, assembly, maf);
    }
  }

  static
  void convertAssemblyToInvernalCoordinates(Correlation& corr, const MAFAlignment& maf) {
    corr.setStart(maf.convertAssemblyPositionToColumnId(corr.getStart()));
    corr.setStop(maf.convertAssemblyPositionToColumnId(corr.getStop()));
    if (!corr.validate()) {
      corr.setStart(-1);
      corr.setStop(-1);
    }
    // ASSERT(corr.validate());
  }
  
  /** Converts between internal coordinates (the column number) to assembly coordinates */
  template <typename _Iterator>
  static
  void convertAssemblyToInternalCoordinates(_Iterator itBegin, _Iterator itEnd, const MAFAlignment& maf) {
    for (_Iterator it = itBegin; it != itEnd; it++) {
      convertAssemblyToInternalCoordinates(*it, maf);
    }
  }

  /** Returns new stem in internal coordinates, given a stem in assembly coordinates */
  static Stem convertAssemblyToInternalCoordinates(const Stem& stem, MAFAlignment& maf) {
    return Stem(maf.convertAssemblyPositionToColumnId(stem.getStart()),
		maf.convertAssemblyPositionToColumnId(stem.getStop()),
		stem.getLength());
  }

  /** Keeps stems of minimum length 2 */
  static result_container filterIsolatedCorrelations2(const result_container& correlations, length_type distMin);

  /** Keeps stems of minimum length 3 */
  static result_container filterIsolatedCorrelations3(const result_container& correlations, length_type distMin);

  /** Converts from correlations to vector of stems */
  static Vec<Stem> convertCorrelationsToStems(const result_container& correlations, length_type distMin, Stem::index_type stemLengthMin);
  
  /** Converts from correlations to vector of FORWARD stems: these mean, that if i,j bind, then i+k,j+k (0 <= k < len) bind too */
  static Vec<Stem> convertCorrelationsToForwardStems(const result_container& correlations, length_type distMin, Stem::index_type stemLengthMin);
  
  static Vec<Stem> singleLinkageFilter(const Vec<Stem>& stems, SingleLinkage2DProgressiveFilter& filter);
  
 private:

  /** Adds a single base pair. Potentially results in several new stems, because the filter might contain buffered result clusters */
  static Vec<Stem> singleLinkageFilter(length_type start, length_type stop, SingleLinkage2DProgressiveFilter& filter);

};


#endif
