#include <compass_help.h>
#include <vectornumerics.h>
#include <RankedSolution5.h>
#include <generalNumerics.h>
#include <stemhelp.h>
#include <Random.h>
#include <StringTools.h>

/** reads matrix in format that is also used by program "R" */
/*
Vec<Vec<double> >
readPlainMatrix(istream& is)
{
  Vec<Vec<double > > result;
  Vec<string> words;
  while (is) {
    string line = getLine(is);
    if ((line.size() == 0) || (line[0] == '#')) {
      continue;
    }
    words = getTokens(line);
    if (words.size() > 0) {
      Vec<double> row(words.size());
      for (unsigned int i = 0; i < words.size(); ++i) {
	row[i] = stod(words[i]);
      }
      result.push_back(row);
    }
  }
  return result;
}
*/

double
rescaleWithLength(double x,
		  double alpha)
{
  PRECOND(alpha > 0.0);
  if (x <= 0.0) {
    return 0.0;
  }
  return pow(x, alpha);
}

void
rescaleWithLength(Vec<Vec<double> >& compMatrix, 
		  double lengthScaleWeight, 
		  double lengthScaleMean,
		  unsigned int length) 
{
  double alpha = 1.0 + lengthScaleWeight * ((length/lengthScaleMean) - 1.0);
  if (alpha <= 0.0) {
    cout << "Warning: exponent smaller or equal zero encountered in rescaling!"
	 << endl;
    return;
  }
  cout << "Rescaling result matrix with exponent: " << alpha << endl;
  for (unsigned int i = 0; i < compMatrix.size(); ++i) {
    for (unsigned int j = 0; j < compMatrix[i].size(); ++j) {
      compMatrix[i][j] = rescaleWithLength(compMatrix[i][j], alpha);
    }
  }
}

double
rescaleWithDistance(double x,
		    double alpha)

{
  PRECOND(alpha > 0.0);
  if (x <= 0.0) {
    return 0.0;
  }
  return pow(x, alpha);
}

void
rescaleWithDistance(Vec<Vec<double> >& compMatrix, 
		    double distanceScaleWeight, 
		    double distanceScaleMean) 
{
  double d, alpha;
  for (unsigned int i = 0; i < compMatrix.size(); ++i) {
    for (unsigned int j = 0; j < compMatrix[i].size(); ++j) {
      if (abs(static_cast<int>(i)-static_cast<int>(j)) < 4) {
	continue;
      }
      d = fabs(static_cast<double>(i)-static_cast<double>(j)) - distanceScaleMean;
      if (d < 0) {
	continue;
      }
      alpha = 1.0 + (distanceScaleWeight * d);
      compMatrix[i][j] = rescaleWithDistance(compMatrix[i][j], alpha);
    }
  }
}


/** counts number of non-gap characters in string */
int
countNonGapChars(const string& s)
{
  int count = 0;
  for (unsigned int i = 0; i < s.size(); ++i) {
    if ((s[i] != '.') && (s[i] != '-')) {
      ++count;
    }
  }
  return count;
}

/** filteres out bad rows and columns */
Vec<unsigned int>
filterBadColumns(Vec<Vec<double> >& compMatrix, double threshold, 
		 unsigned int maxAbove)
{
  Vec<unsigned int> filtered;
  unsigned int bestCountAbove = 0;
  unsigned int bestId = 0;
  do {
    bestCountAbove = 0;
    for (unsigned int i = 0; i < compMatrix.size(); ++i) {
      if (findFirstIndex(filtered, i) < filtered.size()) {
	continue; // this site has already been visited
      }
      unsigned int countAbove = 0;
      for (unsigned int j = 0; j < compMatrix[i].size(); ++j) {
	if (compMatrix[i][j] > threshold) {
	  ++countAbove;
	}
      }
      if (countAbove > bestCountAbove) {
	bestCountAbove = countAbove;
	bestId = i;
      }
    }
    if (bestCountAbove > maxAbove) {
      for (unsigned int j = 0; j < compMatrix[bestId].size(); ++j) {
	filtered.push_back(bestId);
	compMatrix[bestId][j] = 0.0;
	compMatrix[j][bestId] = 0.0;
      }
    }
  }
  while (bestCountAbove > maxAbove);
  return filtered;
}

/** sets all matrix elements to zero that correspond to alignment columns with less than numCharFilter non-gap characters */
void
charMinFilter(Vec<Vec<double> >& matrix, const SequenceAlignment& ali, int numCharMinFilter)
{
  for (unsigned int i = 0; i < ali.getLength(); ++i) {
    string col = ali.getColumn(i);
    if (countNonGapChars(col) < numCharMinFilter) {
      for (unsigned int j = 0; j < matrix.size(); ++j) {
	matrix[i][j] = 0.0;
	matrix[j][i] = 0.0;
      }
    }
  }
}

/** sets all matrix elements to zero that correspond to alignment columns with less than numCharFilter non-gap characters */
void
gapFracRescale(Vec<Vec<double> >& matrix, const SequenceAlignment& ali, double alpha)
{
  char gapChar = GAP_CHAR;
  if (ali.size() == 0) {
    return;
  }
  for (unsigned int i = 0; i < ali.getLength(); ++i) {
    string col1 = ali.getColumn(i);
    for (unsigned int j = i + 1; j < ali.getLength(); ++j) {
      string col2 = ali.getColumn(j);
      unsigned int gapCount = 0;
      for (unsigned int k = 0; k < col1.size(); ++k) {
	if ((col1[k] == gapChar) || (col2[k] == gapChar)) {
	  ++gapCount;
	}
      }
      double charFrac = (ali.size()-gapCount) / static_cast<double>(ali.size());
      matrix[i][j] *= pow(charFrac, alpha);
      matrix[j][i] *= pow(charFrac, alpha);
    }
  }
}

/** sets all matrix elements to zero that correspond to alignment columns with less than numCharFilter non-gap characters */
void
entropyRescale(Vec<Vec<double> >& matrix, const SequenceAlignment& ali, double alpha)
{
  char gapChar = GAP_CHAR;
  if (ali.size() == 0) {
    return;
  }
  for (unsigned int i = 0; i < ali.getLength(); ++i) {
    string col1 = ali.getColumn(i);
    for (unsigned int j = i + 1; j < ali.getLength(); ++j) {
      string col2 = ali.getColumn(j);
      unsigned int gapCount = 0;
      for (unsigned int k = 0; k < col1.size(); ++k) {
	if ((col1[k] == gapChar) || (col2[k] == gapChar)) {
	  ++gapCount;
	}
      }
      unsigned int charCount = ali.size() - gapCount; // number of non-gap pairs
      double factor = (2*exp(-alpha * charCount)) + 1.0;
      matrix[i][j] *= factor;
      matrix[j][i] = matrix[i][j];
    }
  }
}


/** converts matrix to p-values
 @see http://mathworld.wolfram.com/NormalDistribution.html */
double
convertToPValues(double val,
		 double pStatAvg, double pStatDev)
{
  const double SQRT2 = sqrt(2.0);
  double dval = val-pStatAvg;
  if ((dval <= 0.0) || (pStatDev <= 0.0)) {
    return 0.0;
  }
  // return 0.5 * (1.0 + errorFunction((val-pStatAvg)/pStatDev*SQRT2));
  return errorFunction(dval/(pStatDev*SQRT2));
}

/** converts matrix to p-values */
void
convertToPValues(Vec<Vec<double> >& compMatrix, 
		 double pStatAvg, double pStatDev)
{
  for (unsigned int i = 0; i < compMatrix.size(); ++i) {
    for (unsigned int j = 0; j < compMatrix[i].size(); ++j) {
      compMatrix[i][j] = convertToPValues(compMatrix[i][j], 
					  pStatAvg, pStatDev);
    }
  }
}

 void
 computeInformationZScore(const SequenceAlignment& ali, 
			  const CompensationScorer& scorer2, 
			  double& informationMean, 
			  double& informationDev,
			  const Vec<double>& wVec)
 {
   Vec<double> infoVec(ali.getLength(), 0.0);
   string slice;
   for (unsigned int i = 0; i < ali.getLength(); ++i) {
     slice = ali.getColumn(i);
     infoVec[i] = scorer2.singleEntropy3(slice, wVec);
     // cout << infoVec[i] << " : Information of " << slice << " " << wVec << endl;
   }
   informationMean = vecMean(infoVec);
   informationDev = sqrt(vecVariance(infoVec));
   if (informationDev < INF_SINGLE_DEV_MIN) {
     cout << "Warning: Zero deviation of single column information: " 
	  << informationMean << " " << informationDev << endl
	  << infoVec << endl;
     informationDev = INF_SINGLE_DEV_MIN;
   }
 }

 /** returns 1 for highly reliable prediction,
     0 for totally uncompatible prediction */
 double
 getCompensationScore(const string& col1Orig,
		      const string& col2Orig,
		      const Vec<double>& aliSequenceWeights,
		      const Vec<unsigned int>& subset,
		      const CompensationScorer& scorer)
 {
   string col1 = getSubset(col1Orig, subset);
   string col2 = getSubset(col2Orig, subset);
   ASSERT(col1.size() == subset.size());
   return scorer.compensationScore(col1, col2, aliSequenceWeights);
 }

 double
 setSimilarity(const Vec<unsigned int>& v1,
	       const Vec<unsigned int>& v2)
 {
   return commonSubset(v1, v2).size() / (0.5 * static_cast<double>(v1.size()+v2.size()));
 }

void
findOptimalSubset(const SequenceAlignment& ali,
		  const Vec<unsigned int>& positions,
		  const Vec<double>& aliSequenceWeights,
		  const CompensationScorer& scorer,
		  unsigned int numTrials)
{
   cout << "Starting findOptimalSubset with "
	<< positions << " " << numTrials << endl;
   unsigned int nn = ali.size();
   numTrials = 1000;
   double similarityMax = 0.75;
   Vec<unsigned int> result = generateStair(nn);
   Vec<unsigned int> order = generateStair(nn);
   Random& rnd = Random::getInstance();
   Vec<Vec<unsigned int> > sofarSets;
   Vec<double> sofarScores;
   unsigned int startList = 3;
   Vec<unsigned int> currentOrder;
   double score, newScore;
   for (unsigned int i = 0; i < numTrials; ++i) {
     random_shuffle(order.begin(), order.end(), rnd);
     currentOrder.clear();
     currentOrder = Vec<unsigned int>(startList);
     for (unsigned int j = 0; j < startList; ++j) {
       currentOrder[j] = order[j];
     }
     score = 0.0;
     for (unsigned int ii = 0; ii + 1 < positions.size(); ii += 2) {
       string s1 = ali.getColumn(positions[ii]);
       string s2 = ali.getColumn(positions[ii+1]);
       score += getCompensationScore(s1, s2, aliSequenceWeights, 
				     currentOrder, scorer);
     }
     for (unsigned int j = startList; j < order.size(); ++j) {
       currentOrder.push_back(order[j]);
       newScore = 0.0;
       for (unsigned int ii = 0; ii + 1 < positions.size(); ii += 2) {
	 string s1 = ali.getColumn(positions[ii]);
	 string s2 = ali.getColumn(positions[ii+1]);	 
	 newScore += getCompensationScore(s1, s2, aliSequenceWeights, 
				    currentOrder, scorer);
       }
       if (newScore >= score) {
	 // accepting step!
	 score = newScore;
       }
       else {
	 currentOrder.erase(currentOrder.begin()+currentOrder.size()-1);
       }
     }
     sort(currentOrder.begin(), currentOrder.end());
     bool found = false;
     // check for set similars that are already stored:
     for (unsigned int k = 0; k < sofarSets.size(); ++k) {
       if (setSimilarity(sofarSets[k], currentOrder) > similarityMax) {
	 found = true;
	 break;
       }
     }
     if (!found) {
       sofarSets.push_back(currentOrder);
       sofarScores.push_back(score);
     }
   }
   
   cout << "The following sets and scores where found: " 
	<< endl;

   for (unsigned int i = 0; i < sofarSets.size(); ++i) {
     cout << sofarScores[i] << " " << externalCounting(sofarSets[i]) << endl;
   }

 }

 /** returns alignment column to n'th character in sequence without gaps */
 Vec<unsigned int>
 getAlignmentIndices(const string& s, char gapChar)
 {
   // count all non-gap characters:
   Vec<unsigned int> result;
   for (unsigned int i = 0; i < s.size(); ++i) {
     if (s[i] != gapChar) {
       result.push_back(i);
     }
   }   
   cout << "Getting alignment indices: " << s << " " << result.size() << endl;
   return result;
 }


 // generate consensus probability matrix:
 // store optional pairing probability matrices of sequences
 Vec<Vec<double> >
 generateProbabilityMatrix(const Vec<Vec<RankedSolution5<unsigned int, unsigned int> > > probVectors,
			   const Vec<string>& probNames,
			   const SequenceAlignment& ali,
			   char gapChar,
			   const Vec<double>& sequenceWeights)
 {
   PRECOND(probVectors.size() == probNames.size());
   Vec<Vec<double> > result(ali.getLength(), Vec<double>(ali.getLength(), 0.0));
   Vec<Vec<unsigned int> > resultCounter(ali.getLength(), Vec<unsigned int>(ali.getLength(), 0U));
   cout << "Number of probability vectors: " << probVectors.size() << endl;
   cout << "Probability names: " << probNames << endl;
   cout << "Alignment size and length: " << ali.size() << " " << ali.getLength() << endl;
   cout << "Sequence weights: " << sequenceWeights.size() << endl;
   for (unsigned int i = 0; i < probVectors.size(); ++i) {
     unsigned int tid = ali.findSequenceByName(probNames[i]);
     double sWeight = sequenceWeights[tid];
     // cout << "Tid" << tid << endl;
     if (tid >= ali.size()) {
       cout << "Trying to find: " << probNames[i] << endl;
       ERROR("Name of probability data not found in alignment!");
     }
     // compute alignment column indices of n'th residue in gapless sequence
     Vec<unsigned int> newIds = getAlignmentIndices(ali.getSequence(tid), gapChar);
     cout << ali.getName(tid) << " " << ali.getSequence(tid) << endl;
     cout << "New alignment indices: " << newIds << endl;
     for (unsigned int j = 0; j < probVectors[i].size(); ++j) {
       // convert to alignment index:
       ERROR_IF(probVectors[i][j].second >= newIds.size(), 
		"Probability vector x index larger than sequence size!");
       unsigned int nx = newIds[probVectors[i][j].second];
       cout << "testing prob vector: " << i << " " << j << " " 
	    << probVectors[i][j].first << " " << probVectors[i][j].second << " " << probVectors[i][j].third << " "
	    << newIds.size() << endl;
       ERROR_IF(probVectors[i][j].third >= newIds.size(), 
		"Probability vector y index larger than sequence size!");
       unsigned int ny = newIds[probVectors[i][j].third];
       ERROR_IF(nx >= result.size(), "Internal error in line 1555!");
       ERROR_IF(ny >= result[nx].size(), "Internal error in line 1556!");
       result[nx][ny] += sWeight * probVectors[i][j].first; // multiply with weight of sequence
       resultCounter[nx][ny] += 1;
       result[ny][nx] = result[nx][ny];
       resultCounter[ny][nx] = resultCounter[nx][ny];
     }
   }
   // compute averages
   for (unsigned int i = 0; i < result.size(); ++i) {
     for (unsigned int j = 0; j < result[i].size(); ++j) {
       if (resultCounter[i][j] > 0) {
	 result[i][j] /= resultCounter[i][j];
       }
     }
   }
   return result;
 }

 /** generates consensus probability matrix:
     store optional pairing probability matrices of sequences
     different algorithm: compute consensus instead of average 
 */
 Vec<Vec<double> >
 generateProbabilityMatrix2(const Vec<Vec<RankedSolution5<unsigned int, unsigned int> > > probVectors,
			    const Vec<string>& probNames,
			    const SequenceAlignment& ali,
			    char gapChar,
			    const Vec<double>& sequenceWeights)
 {
   PRECOND(probVectors.size() == probNames.size());
   Vec<Vec<double> > result = generateProbabilityMatrix(probVectors, probNames, ali, gapChar, sequenceWeights);
   for (unsigned int sx = 0; sx < result.size(); ++sx) {
     for (unsigned int sy = sx+1; sy < result.size(); ++sy) {
       for (unsigned int i = 0; i < probVectors.size(); ++i) {
	 bool found = false; // search for non-matching position:
	 unsigned int tid = ali.findSequenceByName(probNames[i]);
	 // double sWeight = sequenceWeights[tid];
	 // cout << "Tid" << tid << endl;
	 if (tid >= ali.size()) {
	   cout << "Trying to find: " << probNames[i] << endl;
	   ERROR("Name of probability data not found in alignment!");
	 }
	 // compute alignment column indices of n'th residue in gapless sequence
	 Vec<unsigned int> newIds = getAlignmentIndices(ali.getSequence(tid), gapChar);
	 //     cout << ali.getName(tid) << " " << ali.getSequence(tid) << endl;
	 //     cout << "New alignment indices: " << newIds << endl;
	 for (unsigned int j = 0; j < probVectors[i].size(); ++j) {
	   // convert to alignment index:
	   ERROR_IF(probVectors[i][j].second >= newIds.size(), 
		    "Probability vector x index larger than sequence size!");
	   unsigned int nx = newIds[probVectors[i][j].second];
	   ERROR_IF(probVectors[i][j].third >= newIds.size(), 
		    "Probability vector y index larger than sequence size!");
	   unsigned int ny = newIds[probVectors[i][j].third];
	   ERROR_IF(nx >= result.size(), "Internal error in line 1555!");
	   ERROR_IF(ny >= result[nx].size(), "Internal error in line 1556!");
	   if (((nx == sx) && (ny == sy)) || ((ny == sx) && (nx == sy))) {
	     found = true;
	     break;
	   }
	 }
	 if (!found) {
	   result[sx][sy] = 0.0; // reset value, because it was not found in a single structure
	   result[sy][sx] = 0.0;
	   break;
	 }
       }
     }
   }
   return result;
 }

 /** generates consensus probability matrix:
     store optional pairing probability matrices of sequences
     different algorithm: compute consensus instead of average 
 */
 Vec<Vec<double> >
 generateProbabilityMatrix3(const Vec<Vec<RankedSolution5<unsigned int, unsigned int> > > probVectors,
			    const Vec<string>& probNames,
			    const SequenceAlignment& ali,
			    char gapChar,
			    const Vec<double>& sequenceWeights)
 {
   PRECOND(probVectors.size() == probNames.size());
   Vec<Vec<double> > result = generateProbabilityMatrix(probVectors, probNames, ali, gapChar, sequenceWeights);
   if (probVectors.size() < 2) {
     return result;
   }
   Vec<Vec<unsigned int> > counterMatrix(result.size(), Vec<unsigned int>(result.size(), 0U));
   double threshFrac = 2.0/3.0;
   double threshScale = 5;
   for (unsigned int i = 0; i < probVectors.size(); ++i) {
     unsigned int tid = ali.findSequenceByName(probNames[i]);
     // double sWeight = sequenceWeights[tid];
     // cout << "Tid" << tid << endl;
     if (tid >= ali.size()) {
       cout << "Trying to find: " << probNames[i] << endl;
       ERROR("Name of probability data not found in alignment!");
     }
     // compute alignment column indices of n'th residue in gapless sequence
     Vec<unsigned int> newIds = getAlignmentIndices(ali.getSequence(tid), gapChar);
     //     cout << ali.getName(tid) << " " << ali.getSequence(tid) << endl;
     //     cout << "New alignment indices: " << newIds << endl;
     for (unsigned int j = 0; j < probVectors[i].size(); ++j) {
       // convert to alignment index:
       ERROR_IF(probVectors[i][j].second >= newIds.size(), 
		"Probability vector x index larger than sequence size!");
       unsigned int nx = newIds[probVectors[i][j].second];
       ERROR_IF(probVectors[i][j].third >= newIds.size(), 
		"Probability vector y index larger than sequence size!");
       unsigned int ny = newIds[probVectors[i][j].third];
       ERROR_IF(nx >= result.size(), "Internal error in line 1555!");
       ERROR_IF(ny >= result[nx].size(), "Internal error in line 1556!");
       counterMatrix[nx][ny] += 1;
       counterMatrix[ny][nx] = counterMatrix[nx][ny];
     }
   }
   for (unsigned int i = 0; i < result.size(); ++i) {
     for (unsigned int j = 0; j < result[i].size(); ++j) {
       double frac = static_cast<double>(counterMatrix[i][j]) / probVectors.size();
       ASSERT((frac >= 0.0) && (frac <= 1.0));
       double weight = logistic( (frac - threshFrac) * threshScale);
       result[i][j] *= weight; // reset value, because it was not found in a single structure
       result[j][i] = result[i][j];
     }
   }
   return result;
 }


 /** returns Mathews coefficient between reference stems and prediction matrix,
  accuracy2 is defined according to Juan and Wilson */
 double 
 computeMatrixMathews2(const Vec<Vec<double> >& compMatrix,
		      const Vec<Stem>& referenceStems,
		      double scoreLimit,
		      double& accuracy,
		      double& accuracy2,
		      double& coverage,
		      double& wrongPosFrac,
		      unsigned int& wrongPos,
		      unsigned int borderLim,
		      unsigned int diagLim)
 {
   PRECOND(compMatrix.size() > 0);
   Vec<Vec<double> > stemMatrix(compMatrix.size(), 
				Vec<double>(compMatrix.size(), 0.0));
   addStemsToMatrix(stemMatrix, referenceStems, 1.0);
   unsigned int tp = 0;
   unsigned int fp = 0;
   unsigned int tn = 0;
   unsigned int fn = 0;

   for (unsigned int i = borderLim; 
	i < static_cast<int>(stemMatrix.size()) - borderLim; ++i) {
     for (unsigned int j = borderLim; j + diagLim < i; ++j) {
       if (stemMatrix[i][j] > 0.0) {
	 if (compMatrix[i][j] >= scoreLimit) {
	   ++tp;
	 }
	 else {
	   ++fn;
	 }
       }
       else {
	 if (compMatrix[i][j] >= scoreLimit) {
	   ++fp;
	 }
	 else {
	   ++tn;
	 }
       }
     }
   }
   if ((tp + fp) == 0) {
     accuracy = 0.0;
   }
   else {
     accuracy = static_cast<double>(tp) / (tp + fp);
   }
   if ((tp + fp + fn) == 0) {
     accuracy2 = 0.0;
   }
   else {
     accuracy2 = static_cast<double>(tp)/static_cast<double>(tp + fn + fp);
   }
   if ((tp + fn) == 0) {
     coverage = 0.0;
   }
   else {
     coverage = static_cast<double>(tp) / (tp + fn);
   }
   wrongPos = fp + fn; // number of wrong positions
   // divide by alignment length
   wrongPosFrac = static_cast<double>(wrongPos) 
     / static_cast<double>(compMatrix.size()); 
   return computeMathews(tp, fp, tn, fn);
 }


/** returns Mathews coefficient between reference stems and prediction matrix,
    accuracy2 is defined according to Juan and Wilson */
void
computeMatrixMathews2(const Vec<Vec<double> >& compMatrix,
		      const Vec<Stem>& referenceStems,
		      double scoreLimit,
		      unsigned int borderLim,
		      unsigned int diagLim,
		      ClassificationResult& classResult)
 {
   PRECOND(compMatrix.size() > 0);
   Vec<Vec<double> > stemMatrix(compMatrix.size(), 
				Vec<double>(compMatrix.size(), 0.0));
   addStemsToMatrix(stemMatrix, referenceStems, 1.0);
   unsigned int tp = 0;
   unsigned int fp = 0;
   unsigned int tn = 0;
   unsigned int fn = 0;

   for (unsigned int i = borderLim; 
	i < static_cast<int>(stemMatrix.size()) - borderLim; ++i) {
     for (unsigned int j = borderLim; j + diagLim < i; ++j) {
       if (stemMatrix[i][j] > 0.0) {
	 if (compMatrix[i][j] >= scoreLimit) {
	   ++tp;
	 }
	 else {
	   ++fn;
	 }
       }
       else {
	 if (compMatrix[i][j] >= scoreLimit) {
	   ++fp;
	 }
	 else {
	   ++tn;
	 }
       }
     }
   }
   classResult.truePos = tp;
   classResult.trueNeg = tn;
   classResult.falsePos = fp;
   classResult.falseNeg = fn;
 }


/** returns Mathews coefficient between reference stems and prediction matrix,
    accuracy2 is defined according to Juan and Wilson */
void
computeMatrixMathews2Intervall(const Vec<Vec<double> >& compMatrix,
			       const Vec<Stem>& referenceStems,
			       double scoreLimitLow,
			       double scoreLimitHigh,
			       unsigned int borderLim,
			       unsigned int diagLim,
			       ClassificationResult& classResult)
 {
   PRECOND(compMatrix.size() > 0);
   Vec<Vec<double> > stemMatrix(compMatrix.size(), 
				Vec<double>(compMatrix.size(), 0.0));
   addStemsToMatrix(stemMatrix, referenceStems, 1.0);
   unsigned int tp = 0;
   unsigned int fp = 0;
   unsigned int tn = 0;
   unsigned int fn = 0;

   for (unsigned int i = borderLim; 
	i < static_cast<int>(stemMatrix.size()) - borderLim; ++i) {
     for (unsigned int j = borderLim; j + diagLim < i; ++j) {
       if (stemMatrix[i][j] > 0.0) {
	 if ((compMatrix[i][j] >= scoreLimitLow) 
	     && (compMatrix[i][j] <= scoreLimitHigh)) {
	   ++tp;
	 }
	 else {
	   ++fn;
	 }
       }
       else {
	 if ((compMatrix[i][j] >= scoreLimitLow) 
	   && (compMatrix[i][j] <= scoreLimitHigh)) {
	   ++fp;
	 }
	 else {
	   ++tn;
	 }
       }
     }
   }
   classResult.truePos = tp;
   classResult.trueNeg = tn;
   classResult.falsePos = fp;
   classResult.falseNeg = fn;
 }

/** returns Mathews coefficient between reference stems and prediction matrix,
    accuracy2 is defined according to Juan and Wilson */
void
computeMatrixMathews2IntervallB(const Vec<Vec<double> >& compMatrix,
				const Vec<Stem>& referenceStems,
				double scoreLimitLow,
				double scoreLimitHigh,
				unsigned int borderLim,
				unsigned int diagLim,
				Histogram& basePairHist,
				Histogram& nobasePairHist,
				ClassificationResult& classResult)
 {
   PRECOND(compMatrix.size() > 0);
   Vec<Vec<double> > stemMatrix(compMatrix.size(), 
				Vec<double>(compMatrix.size(), 0.0));
   addStemsToMatrix(stemMatrix, referenceStems, 1.0);
   unsigned int tp = 0;
   unsigned int fp = 0;
   unsigned int tn = 0;
   unsigned int fn = 0;

   for (unsigned int i = borderLim; 
	i < static_cast<int>(stemMatrix.size()) - borderLim; ++i) {
     for (unsigned int j = borderLim; j + diagLim < i; ++j) {
       if (stemMatrix[i][j] > 0.0) {
	 basePairHist.addData(compMatrix[i][j]);
	 if ((compMatrix[i][j] >= scoreLimitLow) 
	     && (compMatrix[i][j] <= scoreLimitHigh)) {
	   ++tp;
	 }
	 else {
	   ++fn;
	 }
       }
       else {
	 nobasePairHist.addData(compMatrix[i][j]);
	 if ((compMatrix[i][j] >= scoreLimitLow) 
	   && (compMatrix[i][j] <= scoreLimitHigh)) {
	   ++fp;
	 }
	 else {
	   ++tn;
	 }
       }
     }
   }
   classResult.truePos = tp;
   classResult.trueNeg = tn;
   classResult.falsePos = fp;
   classResult.falseNeg = fn;
 }


int
findStemStretchLen(const Vec<Vec<double> >& matrix,
		   int xOrig, int yOrig, double thresh, int diagLim)
{
  int x = xOrig;
  int y = yOrig;
  if (matrix[x][y] < thresh) {
    return 0;
  }
  int len1 = 1;
  for (len1 = 1; ; ++len1) {
    x = xOrig + len1;
    y = yOrig - len1;
    if ((abs(x-y) < diagLim) || (x >= static_cast<int>(matrix.size())) 
	|| (y < 0)) {
      break;
    }
    if (matrix[x][y] < thresh) {
      break;
    }
  }
  --len1;
  int len2 = 1;
  for (len2 = 1; ; ++len2) {
    x = xOrig - len2;
    y = yOrig + len2;
    if ((abs(x-y) < diagLim) || (y >= static_cast<int>(matrix.size())) 
	|| (x < 0)) {
      break;
    }
    if (matrix[x][y] < thresh) {
      break;
    }
  }
  --len2;
  return 1 + len1 + len2;
}

int
findStemStretchSequenceLen(const string& sequence,
			   int xOrig, int yOrig, int diagLim)
{
  int x = xOrig;
  int y = yOrig;
  if (!isComplementary(sequence[x], sequence[y])) {
    return 0;
  }
  int len1 = 1;
  for (len1 = 1; ; ++len1) {
    x = xOrig + len1;
    y = yOrig - len1;
    if ((abs(x-y) < diagLim) || (x >= static_cast<int>(sequence.size())) || (y < 0)) {
      break;
    }
    if (!isComplementary(sequence[x], sequence[y])) {
      break;
    }
  }
  --len1;
  int len2 = 1;
  for (len2 = 1; ; ++len2) {
    x = xOrig - len2;
    y = yOrig + len2;
    if ((abs(x-y) < diagLim) || (y >= static_cast<int>(sequence.size())) || (x < 0)) {
      break;
    }
    if (!isComplementary(sequence[x], sequence[y])) {
      break;
    }
  }
  --len2;
  return 1 + len1 + len2;
}

/** returns true, if stem of exactly len positions is above threshold */
bool
isStemStretch(const Vec<Vec<double> >& matrix,
	      int x, int y, double thresh, int len, int diagLim)
{
  return (findStemStretchLen(matrix, x, y, thresh,diagLim) == len);
}

/** returns true, if stem of exactly len positions (using Watson-Crick pairs). */
bool
isStemStretchSequence(const string& sequence,
		      int x, int y, int len, int diagLim)
{
  return (findStemStretchSequenceLen(sequence, x, y,diagLim) == len);
}

/** returns Mathews coefficient between reference stems and prediction matrix
 */
void
computeMatrixMathews2IntervallStem(const Vec<Vec<double> >& compMatrix,
				   const Vec<Stem>& referenceStems,
				   double scoreLimitLow,
				   double scoreLimitHigh,
				   int stemLen,
				   unsigned int borderLim,
				   unsigned int diagLim,
				   ClassificationResult& classResult)
{
  PRECOND(compMatrix.size() > 0);
  Vec<Vec<double> > stemMatrix(compMatrix.size(), 
			       Vec<double>(compMatrix.size(), 0.0));
  addStemsToMatrix(stemMatrix, referenceStems, 1.0);
  unsigned int tp = 0;
  unsigned int fp = 0;
  unsigned int tn = 0;
  unsigned int fn = 0;
  
  for (unsigned int i = borderLim; 
       i < static_cast<int>(stemMatrix.size()) - borderLim; ++i) {
    for (unsigned int j = borderLim; j + diagLim < i; ++j) {
      if (stemMatrix[i][j] > 0.0) {
	if ((compMatrix[i][j] >= scoreLimitLow) 
	    && (compMatrix[i][j] <= scoreLimitHigh)
	    && (isStemStretch(compMatrix, i, j,  scoreLimitLow, stemLen,  diagLim))) {
	  ++tp;
	}
	 else {
	   ++fn;
	 }
       }
       else {
	 if ((compMatrix[i][j] >= scoreLimitLow) 
	     && (compMatrix[i][j] <= scoreLimitHigh)
	     && (isStemStretch(compMatrix, i, j,  scoreLimitLow, stemLen, diagLim))) {
	   ++fp;
	 }
	 else {
	   ++tn;
	 }
       }
     }
   }
   classResult.truePos = tp;
   classResult.trueNeg = tn;
   classResult.falsePos = fp;
   classResult.falseNeg = fn;
 }

/** Returns Mathews coefficient between reference stems and prediction matrix.
 * Defines stem by complementary base pairs of supplied sequence.
 */
void
computeMatrixMathews2IntervallStemSequence(const Vec<Vec<double> >& compMatrix,
					   const string& sequence,
					   const Vec<Stem>& referenceStems,
					   double scoreLimitLow,
					   double scoreLimitHigh,
					   int stemLen,
					   unsigned int borderLim,
					   unsigned int diagLim,
					   ClassificationResult& classResult)
{
  PRECOND(compMatrix.size() > 0);
  Vec<Vec<double> > stemMatrix(compMatrix.size(), 
			       Vec<double>(compMatrix.size(), 0.0));
  addStemsToMatrix(stemMatrix, referenceStems, 1.0);
  unsigned int tp = 0;
  unsigned int fp = 0;
  unsigned int tn = 0;
  unsigned int fn = 0;  
  for (unsigned int i = borderLim; 
       i < static_cast<int>(stemMatrix.size()) - borderLim; ++i) {
    for (unsigned int j = borderLim; j + diagLim < i; ++j) {
      if (stemMatrix[i][j] > 0.0) {
	if ((compMatrix[i][j] >= scoreLimitLow) 
	    && (compMatrix[i][j] <= scoreLimitHigh)
	    && (isStemStretchSequence(sequence, i, j, stemLen,  diagLim))) {
	  ++tp;
	}
	 else {
	   ++fn;
	 }
       }
       else {
	 if ((compMatrix[i][j] >= scoreLimitLow) 
	     && (compMatrix[i][j] <= scoreLimitHigh)
	     && (isStemStretchSequence(sequence, i, j, stemLen, diagLim))) {
	   ++fp;
	 }
	 else {
	   ++tn;
	 }
       }
     }
   }
   classResult.truePos = tp;
   classResult.trueNeg = tn;
   classResult.falsePos = fp;
   classResult.falseNeg = fn;
 }


 /** returns Mathews coefficient between reference stems and prediction matrix, 
     computed only for positions which are not a gap character in reference sequence. */
 double 
 computeMatrixMathews2(const Vec<Vec<double> >& compMatrix,
		      const Vec<Stem>& referenceStems,
		      const string& referenceSequence,
		      double scoreLimit,
		      double& accuracy,
		      double& accuracy2,
		      double& coverage,
		      double& wrongPosFrac,
		      unsigned int& wrongPos,
		      unsigned int borderLim,
		      unsigned int diagLim,
		      char gapChar)
 {
   PRECOND(compMatrix.size() > 0);
   Vec<Vec<double> > stemMatrix(compMatrix.size(), 
				Vec<double>(compMatrix.size(), 0.0));
   addStemsToMatrix(stemMatrix, referenceStems, 1.0);
   unsigned int tp = 0;
   unsigned int fp = 0;
   unsigned int tn = 0;
   unsigned int fn = 0;
   for (unsigned int i = borderLim; 
	i < static_cast<int>(stemMatrix.size()) - borderLim; ++i) {
     if (referenceSequence[i] == gapChar) {
       continue;
     }
     for (unsigned int j = borderLim; j + diagLim < i; ++j) {
       if (referenceSequence[j] == gapChar) {
	 continue;
       }
       if (stemMatrix[i][j] > 0.0) {
	 if (compMatrix[i][j] >= scoreLimit) {
	   ++tp;
	 }
	 else {
	   ++fn;
	 }
       }
       else {
	 if (compMatrix[i][j] >= scoreLimit) {
	   ++fp;
	 }
	 else {
	   ++tn;
	 }
       }
     }
   }
   if ((tp + fp) == 0) {
     accuracy = 0.0;
   }
   else {
     accuracy = static_cast<double>(tp) / (tp + fp);
   }
   if ((tp + fp + fn) == 0) {
     accuracy2 = 0.0;
   }
   else {
     accuracy2 = static_cast<double>(tp)/static_cast<double>(tp + fn + fp);
   }
   if ((tp + fn) == 0) {
     coverage = 0.0;
   }
   else {
     coverage = static_cast<double>(tp) / (tp + fn);
   }
   wrongPos = fp + fn; // number of wrong positions
   // divide by alignment length
   wrongPosFrac = static_cast<double>(wrongPos) 
     / static_cast<double>(compMatrix.size()); 
   return computeMathews(tp, fp, tn, fn);
 }

 /** returns Mathews coefficient between reference stems and prediction matrix, 
     computed only for positions which are not a gap character in reference sequence.
 CONTAINS BUG!!! */
double 
computeMatrixMathews(const Vec<Vec<double> >& compMatrix,
		      const Vec<Stem>& referenceStems,
		      const string& referenceSequence,
		      double scoreLimit,
		      double& accuracy,
		      double& coverage,
		      double& wrongPosFrac,
		      unsigned int& wrongPos,
		      unsigned int borderLim,
		      unsigned int diagLim,
		      char gapChar)
{
  PRECOND(compMatrix.size() > 0);
  Vec<Vec<double> > stemMatrix(compMatrix.size(), 
			       Vec<double>(compMatrix.size(), 0.0));
  addStemsToMatrix(stemMatrix, referenceStems, 1.0);
  unsigned int tp = 0;
  unsigned int fp = 0;
  unsigned int tn = 0;
  unsigned int fn = 0;
  for (unsigned int i = borderLim; 
       i + borderLim < stemMatrix.size(); ++i) {
    if (referenceSequence[i] == gapChar) {
      continue;
    }
    unsigned int highestId = borderLim;
    double highestVal = compMatrix[i][highestId];
    for (unsigned int j = borderLim; j + borderLim < stemMatrix.size(); ++j) {
      if (referenceSequence[j] == gapChar) {
	continue;
      }
      if (abs(static_cast<int>(j)-static_cast<int>(i)) < static_cast<int>(diagLim)) {
	continue;
      }
      if (compMatrix[i][j] > highestVal) {
	highestVal = compMatrix[i][j];
	highestId = j;
      }
    }
    unsigned int highestIdRef = borderLim;
    double highestValRef = stemMatrix[i][highestId];
    for (unsigned int j = borderLim; j + borderLim < stemMatrix.size(); ++j) {
      if (referenceSequence[j] == gapChar) {
	continue;
      }
      if (abs(static_cast<int>(j)-static_cast<int>(i)) < static_cast<int>(diagLim)) {
	continue;
      }
      if (stemMatrix[i][j] > highestValRef) {
	highestValRef = stemMatrix[i][j];
	highestIdRef = j;
      }
    }
    if (stemMatrix[i][highestIdRef] >= scoreLimit) {
      if ((highestId == highestIdRef) && (highestVal >= scoreLimit)) {
	++tp;
      }
      else {
	++fn;
      }
    }
    else {
      if (highestVal >= scoreLimit) {
	++fp;
      }
      else {
	++tn;
      }
    }
  }
  if ((tp + fp) == 0) {
    accuracy = 0.0;
  }
  else {
    accuracy = static_cast<double>(tp) / (tp + fp);
  }
  if ((tp + fn) == 0) {
    coverage = 0.0;
  }
  else {
    coverage = static_cast<double>(tp) / (tp + fn);
  }
  wrongPos = fp + fn; // number of wrong positions
  // divide by alignment length
  wrongPosFrac = static_cast<double>(wrongPos) 
    / static_cast<double>(compMatrix.size()); 
  return computeMathews(tp, fp, tn, fn);
}


/** returns Mathews coefficient between reference stems and prediction matrix
 CONTAINS BUG! */
double 
computeMatrixMathews(const Vec<Vec<double> >& compMatrix,
		      const Vec<Stem>& referenceStems,
		      double scoreLimit,
		      double& accuracy,
		      double& coverage,
		      double& wrongPosFrac,
		      unsigned int& wrongPos,
		      unsigned int borderLim,
		      unsigned int diagLim)
{
  PRECOND(compMatrix.size() > 0);
  Vec<Vec<double> > stemMatrix(compMatrix.size(), 
			       Vec<double>(compMatrix.size(), 0.0));
  addStemsToMatrix(stemMatrix, referenceStems, 1.0);
  unsigned int tp = 0;
  unsigned int fp = 0;
  unsigned int tn = 0;
  unsigned int fn = 0;
  for (unsigned int i = borderLim; 
       i + borderLim < stemMatrix.size(); ++i) {
    double highestVal = compMatrix[i][borderLim];
    unsigned int highestId = borderLim;
    for (unsigned int j = borderLim; j + borderLim < stemMatrix.size(); ++j) {
      if (abs(static_cast<int>(j)-static_cast<int>(i)) < static_cast<int>(diagLim)) {
	continue;
      }
      if (compMatrix[i][j] > highestVal) {
	highestVal = compMatrix[i][j];
	highestId = j;
      }
    }
    if (stemMatrix[i][highestId] >= scoreLimit) {
      if (highestVal >= scoreLimit) {
	++tp;
      }
      else {
	++fn;
      }
    }
    else {
      if (highestVal >= scoreLimit) {
	++fp;
      }
      else {
	++tn;
      }
    }
  }
  if ((tp + fp) == 0) {
    accuracy = 0.0;
  }
  else {
    accuracy = static_cast<double>(tp) / (tp + fp);
  }
  if ((tp + fn) == 0) {
    coverage = 0.0;
  }
  else {
    coverage = static_cast<double>(tp) / (tp + fn);
  }
  wrongPos = fp + fn; // number of wrong positions
  // divide by alignment length
  wrongPosFrac = static_cast<double>(wrongPos) 
    / static_cast<double>(compMatrix.size()); 
  return computeMathews(tp, fp, tn, fn);
}

Vec<Vec<double> >
writeIndividualStructure(ostream& os, 
			 const CompensationScorer& scorer,
			 const Vec<Vec<double> >& compMatrixOrig,
			 const string& sequence,
			 const string& sequenceName,
			 double individualThreshold,
			 int format,
			 unsigned int stemMinLength)
{
  // cout << "Starting writeIndividualStructure!" << endl;
  // individualize contact matrix:
  // unsigned int nn = sequence.size();
  Vec<Vec<double> > compMatrix = compMatrixOrig;
  for (unsigned int i = 0; i < sequence.size(); ++i) {
    for (unsigned int j = i; (j < sequence.size()) && ((j-i) < 4); ++j) {
      compMatrix[i][j] = 0.0;
      compMatrix[j][i] = 0.0;
    }
  }
  for (unsigned int i = 0; i < sequence.size(); ++i) {
    if (sequence[i] == GAP_CHAR) {
      for (unsigned int j = 0; j < sequence.size(); ++j) {
	compMatrix[i][j] = 0.0;
	compMatrix[j][i] = 0.0;
      }      
    }
    else {
      for (unsigned int j = i + 4; j < sequence.size(); ++j) {
	if (scorer.isAllowedPair(sequence[i], sequence[j])) {
	  if (compMatrix[i][j] >= individualThreshold) {
	    compMatrix[i][j] = 1.0;
	    compMatrix[j][i] = 1.0;
	  }
	}
	else {
	  compMatrix[i][j] = 0.0;
	  compMatrix[j][i] = 0.0;
	}

      }
    }
  }
  // compute structure:
  Vec<Stem> stems = generateStemsFromMatrix(compMatrix, stemMinLength, 0.5, sequence);
  // write structure
  //  cout << "writing structure " << sequenceName << " " << stems << endl;
  os << sequenceName << endl;
  os << sequence << endl;
  string helpString;
  switch (format) {
  case 1: 
    writeStems(os, stems);
    // writeMatrix(os, compMatrix);
    break;
  case 2:
    helpString = stemsToBracketFasta(stems, sequence);
    os << helpString << endl;
    // writeMatrix(os, compMatrix);
  }
  return compMatrix;
}

Vec<Vec<double> >
writeIndividualStructures(ostream& os, 
			  Vec<Vec<double> > compMatrix, 
			  const CompensationScorer& scorer,
			  const SequenceAlignment& ali, 
			  double individualThreshold,
			  int format,
			  unsigned int stemMinLength)
{
  cout << "Starting writeIndividualStructures with threshold: " << individualThreshold << endl;
  Vec<Vec<double> > result(compMatrix.size(), Vec<double>(compMatrix[0].size(), 0.0));
  for (unsigned int i = 0; i < ali.size(); ++i) {
    // for (unsigned int i = 0; i < 1; ++i) {
    os << "> structure " << i + 1 << " : ";
    Vec<Vec<double> > tmpMatrix = writeIndividualStructure(os, scorer, compMatrix, ali.getSequence(i), ali.getName(i), individualThreshold, format, stemMinLength);
    result = matrixAdd(result, tmpMatrix);
  }
  // compute average:
  for (unsigned int i = 0; i < result.size(); ++i) {
    for (unsigned int j = 0; j < result[i].size(); ++j) {
      result[i][j] /= ali.size();
    }
  }
  return result;
}

/** returns index of word. Example: c1==C c2==C, alphabet={A,C} -> pairs=AA,AC,CA,CC -> result = 1*2+1=3 */
unsigned int
alphabetPairIndex(unsigned int id1, unsigned int id2, unsigned int alphabetSize)
{
  unsigned int result = (id1 * alphabetSize) + id2;
  if (result > alphabetSize) {
    result = alphabetSize;
  }
  return result;
}

/** returns index of word. Example: c1==C c2==C, alphabet={A,C} -> pairs=AA,AC,CA,CC -> result = 1*2+1=3 */
unsigned int
alphabetPairIndex(char c1, char c2, const string& alphabet)
{
  unsigned int id1 = alphabet.find(c1);
  unsigned int id2 = alphabet.find(c2);
  if ((id1 >= alphabet.size()) || (id2 >= alphabet.size())) {
    return ((alphabet.size()*alphabet.size()));
  }
  return (id1 * alphabet.size()) + id2;
}

/** returns all words with length two corresponding to findAlphabetPairIndces */
Vec<string>
generateAlphabetPairs(const string& alphabet)
{
  unsigned int nn = alphabet.size()*alphabet.size();
  string word("XX");
  Vec<string> result(nn, word);
  for (unsigned int id1 = 0; id1 < alphabet.size(); ++id1) {
    for (unsigned int id2 = 0; id2 < alphabet.size(); ++id2) {
      int id = (id1 * alphabet.size()) + id2;
      result[id][0] = alphabet[id1];
      result[id][1] = alphabet[id2];
    }
  }
  return result;
}

void
generateMutationMatrix(const SequenceAlignment& ali,
		       unsigned int p1,
		       unsigned int p2,
		       const string& alphabet,
		       Vec<Vec<double> >& matrix)
{
  unsigned int id1, id2;
  for (unsigned int i = 0; i < ali.size(); ++i) {
    const string s1 = ali.getSequence(i);
    for (unsigned int j = i+1; j < ali.size(); ++j) {
      const string s2 = ali.getSequence(j);
      if ((s1[p1] != s2[p1]) || (s1[p2] != s2[p2])) {
	id1 = alphabetPairIndex(s1[p1], s1[p2], alphabet);
	id2 = alphabetPairIndex(s2[p1], s2[p2], alphabet);
	if ((id1 < matrix.size()) && (id2 < matrix.size())) {
	  matrix[id1][id2] += 1.0;
	  matrix[id2][id1] += 1.0;
	}
      }
    }
  }
}

void
generateMutationMatrices(const SequenceAlignment& ali,
			 const Vec<Vec<double> >& refMatrix,
			 const string& alphabet,
			 Vec<Vec<double> >& contactMatrix,
			 Vec<Vec<double> >& noContactMatrix)
{
  unsigned int nn = alphabet.size() * alphabet.size();
  ERROR_IF(contactMatrix.size() != nn, "Contact matrix has bad size!");
  ERROR_IF(noContactMatrix.size() != nn, "non-Contact matrix has bad size!");
  for (unsigned int i = 0; i < ali.getLength(); ++i) {
    for (unsigned int j = i+4; j < ali.getLength(); ++j) {
      if (refMatrix[i][j] > 0.5) {
	generateMutationMatrix(ali, i, j, alphabet, contactMatrix);
      }
      else {
	generateMutationMatrix(ali, i, j, alphabet, noContactMatrix);
      }
    }
  }
}
