#ifndef _INTERVAL_OPERATIONS_
#define _INTERVAL_OPERATIONS_

#include <debug.h>
#include <Vec.h>
#include <Bed.h>
#include <BedChrom.h>
#include <IntervallIntSet.h>
#include <StringTools.h>
#include <algorithm>   // used for set_intersection algorithm

#ifndef NDEBUG
#define NDEBUG
#endif

#ifndef DELIM
#define DELIM "\t"
#endif

class IntervalOperations {

 public:

  typedef BedChrom::index_type index_type;
  typedef string::size_type size_type;

  static void innerJoin(ostream& os, const Bed& bed1, const Bed& bed2,
			int verbose, bool writeName, bool writeStrand, bool writeScore) {
  for (size_type i = 0; i < bed1.size(); ++i) {
    const BedChrom& bedChrom = bed1.getChrom(i);
    if (verbose > 1) {
      cout << "# Working on chromosome " << bedChrom.getChrom() << endl;
    }
    size_type otherId = bed2.findChromId(bedChrom.getChrom());
    if (otherId >= bed2.size()) {
      continue;
    }
    const BedChrom& bedChrom2 = bed2.getChrom(otherId);
    ASSERT(bedChrom2.size() > 0);
    for (size_type j = 0; j < bedChrom.size(); ++j) {
      index_type start = bedChrom.getStart(j);
      index_type end = bedChrom.getEnd(j);
      if (verbose > 3) {
	cout << "# working on interval " << bedChrom.getChrom() << " " << start << " " << end << endl;
      }
      set<size_type> intervalIds = bedChrom2.findAllOverlapping(start, end);
      if (verbose > 3) {
	cout << "# found: " << intervalIds.size() << endl;
      }
      for (set<size_type>::const_iterator it = intervalIds.begin(); it != intervalIds.end(); it++) {
	ASSERT(*it < bedChrom2.size());
	os << bedChrom.getChrom() << "\t" << start << "\t" << end;
	if (writeName) {
	  os << DELIM << bedChrom.getName(j);
	} 
	if (writeStrand) {
	  os << DELIM << bedChrom.getStrandString(j); 
	}  
	os << DELIM << bedChrom2.getChrom() << DELIM << bedChrom2.getStart(*it) << "\t" << bedChrom2.getEnd(*it);
	if (writeName) {
	  os << DELIM << bedChrom2.getName(*it);
	} 
	if (writeStrand) {
	  os << DELIM << bedChrom2.getStrandString(*it); 
	} 
	if (writeScore) {
	  os << DELIM << bedChrom2.getScore(*it); 
	} 
	os << endl;
      }
    }
  }
}

  /** Inner join with additional requirement that each joined region from first regions set lies
   * COMPLETELY within joined region of second set */
  static void innerJoin1i2(ostream& os, const Bed& bed1, const Bed& bed2,
			int verbose, bool writeName, bool writeStrand, bool writeScore) {
  for (size_type i = 0; i < bed1.size(); ++i) {
    const BedChrom& bedChrom = bed1.getChrom(i);
    if (verbose > 1) {
      cout << "# Working on chromosome " << bedChrom.getChrom() << endl;
    }
    size_type otherId = bed2.findChromId(bedChrom.getChrom());
    if (otherId >= bed2.size()) {
      continue;
    }
    const BedChrom& bedChrom2 = bed2.getChrom(otherId);
    ASSERT(bedChrom2.size() > 0);
    for (size_type j = 0; j < bedChrom.size(); ++j) {
      index_type start = bedChrom.getStart(j);
      index_type end = bedChrom.getEnd(j);
      if (verbose > 3) {
	cout << "# working on interval " << bedChrom.getChrom() << " " << start << " " << end << endl;
      }
      set<size_type> intervalIds = bedChrom2.findAllOverlapping(start, end);
      if (verbose > 3) {
	cout << "# found: " << intervalIds.size() << endl;
      }
      for (set<size_type>::const_iterator it = intervalIds.begin(); it != intervalIds.end(); it++) {
	ASSERT(*it < bedChrom2.size());
	index_type start2 = bedChrom2.getStart(*it);
	index_type end2 = bedChrom2.getEnd(*it);
	if ((start < start2) || (end > end2)) {
	  continue; // ignore cases that in which first interval goes beyond second interval
	}
	os << bedChrom.getChrom() << "\t" << start << "\t" << end;
	if (writeName) {
	  os << DELIM << bedChrom.getName(j);
	} 
	if (writeStrand) {
	  os << DELIM << bedChrom.getStrandString(j); 
	}  
	os << DELIM << bedChrom2.getChrom() << DELIM << start2 << "\t" << end2;
	if (writeName) {
	  os << DELIM << bedChrom2.getName(*it);
	} 
	if (writeStrand) {
	  os << DELIM << bedChrom2.getStrandString(*it); 
	} 
	if (writeScore) {
	  os << DELIM << bedChrom2.getScore(*it); 
	} 
	os << endl;
      }
    }
  }
}

  static Vec<int> getSamplePositions(int start, int end, int lead, int midCount, int trail) {
    PRECOND((end-start) > (lead + midCount + trail));
  Vec<int> result;
  for (int i = 0; i < lead; ++i) {
    result.push_back(start + i);
  }
  int middle = (start-end - lead -trail);
  double stride = middle / midCount;
  ASSERT(stride >= 1.0);
  for (int i = 0; i < midCount; ++i) {
    result.push_back(start + lead + static_cast<int>((i*stride)));
  }
  for (int i = 0; i < trail; ++i) {
    result.push_back(end - trail + i);
  }  
  POSTCOND(static_cast<int>(result.size()) == (lead + midCount + trail));
  POSTCOND(uniqueSet(result).size() == result.size());
  return result;
}

  static void sampleScores(ostream& os, const Bed& bed1, const Bed& bed2,
			   int lead, int midCount, int trail,
			   int verbose, bool writeName, bool writeStrand, bool writeScore) {
  int totPos = lead + midCount + trail;
  for (int t = 0; t < totPos; ++t) {
    os << t << endl;
    for (size_type i = 0; i < bed1.size(); ++i) {
      const BedChrom& bedChrom = bed1.getChrom(i);
      size_type otherId = bed2.findChromId(bedChrom.getChrom());
      if (otherId >= bed2.size()) {
	continue;
      }
      const BedChrom& bedChrom2 = bed2.getChrom(otherId);
      ASSERT(bedChrom2.size() > 0);
      for (size_type j = 0; j < bedChrom.size(); ++j) {
	index_type start = bedChrom.getStart(j);
	index_type end = bedChrom.getEnd(j);
	if ((end - start) <= totPos) {
	  continue; // too short
	}
	Vec<int> samples = getSamplePositions(start, end, lead, midCount, trail);
	ASSERT(totPos < static_cast<int>(samples.size()));
	int sample = samples[totPos];
	set<size_type> intervalIds = bedChrom2.findAllOverlapping(sample);
	for (set<size_type>::const_iterator it = intervalIds.begin(); it != intervalIds.end(); it++) {
	  ASSERT(*it < bedChrom2.size());
	  os << bedChrom2.getScore(*it) << " ";
	}
	os << endl;
      }
    }
  }
}


  /** Writes all intervals of first set that intersect
   * with at least one interval from second sets.
   * Preferrable: have indices built for second set. 
   */
  static void intersect(ostream& os, const Bed& bed1, const Bed& bed2,
			bool writeName, bool writeStrand, bool writeScore) {
  for (size_type i = 0; i < bed1.size(); ++i) {
    const BedChrom& bedChrom = bed1.getChrom(i);
#ifndef NDEBUG
    cout << "# Working on chromosome " << bedChrom.getChrom() << endl;
#endif
    size_type otherId = bed2.findChromId(bedChrom.getChrom());
    if (otherId >= bed2.size()) {
      continue;
    }
    const BedChrom& bedChrom2 = bed2.getChrom(otherId);
    ASSERT(bedChrom2.size() > 0);
    for (size_type j = 0; j < bedChrom.size(); ++j) {
      index_type start = bedChrom.getStart(j);
      index_type end = bedChrom.getEnd(j);
#ifndef NDEBUG
      cout << "# working on interval " << bedChrom.getChrom() << " " << start << " " << end << endl;
#endif
      set<size_type> intervalIds = bedChrom2.findAllOverlapping(start, end);
#ifndef NDEBUG
      cout << "# found: " << intervalIds.size() << endl;
#endif
      if (intervalIds.size() > 0) {
	os << bedChrom.getChrom() << "\t" << start << "\t" << end;
	if (writeName) {
	  os << DELIM << bedChrom.getName(j);
	} 
	if (writeStrand) {
	  os << DELIM << bedChrom.getStrandString(j); 
	}  
	if (writeScore) {
	  os << DELIM << bedChrom.getScore(j); 
	}  
	os << endl;
      }
    }
  }
}


  /** Writes all pieces intervals of first set that intersect
   * with an interval from second sets.
   * Preferrable: have indices built for second set. 
   */
  static void intersectOverlappingPieces(ostream& os, const Bed& bed1, const Bed& bed2,
					 bool writeName, bool writeStrand, bool writeScore,
					 int verbose) {
  for (size_type i = 0; i < bed1.size(); ++i) {
    const BedChrom& bedChrom = bed1.getChrom(i);
#ifndef NDEBUG
    cout << "# Working on chromosome " << bedChrom.getChrom() << endl;
#endif
    size_type otherId = bed2.findChromId(bedChrom.getChrom());
    if (otherId >= bed2.size()) {
      continue;
    }
    const BedChrom& bedChrom2 = bed2.getChrom(otherId);
    ASSERT(bedChrom2.size() > 0);
    for (size_type j = 0; j < bedChrom.size(); ++j) {
      index_type start = bedChrom.getStart(j);
      index_type end = bedChrom.getEnd(j);
#ifndef NDEBUG
      cout << "# working on interval " << bedChrom.getChrom() << " " << start << " " << end << endl;
#endif
      set<size_type> intervalIds = bedChrom2.findAllOverlapping(start, end);
#ifndef NDEBUG
      cout << "# found: " << intervalIds.size() << endl;
#endif
      if (intervalIds.size() > 0) {
	// IntervallIntSet intervallSet; // is zero-based for both start and end!
	// index_type rstart = -1;
	// index_type rend = -1;
	Vec<index_type> mStarts;
	Vec<index_type> mEnds;
	for (set<size_type>::const_iterator it = intervalIds.begin(); it != intervalIds.end(); it++) {
	  ASSERT(start < end);
	  ASSERT(bedChrom2.getStart(*it) < bedChrom2.getEnd(*it));
	  mStarts.push_back(bedChrom2.getStart(*it));
	  mEnds.push_back(bedChrom2.getEnd(*it));
	  ASSERT(mStarts[mStarts.size()-1] < mEnds[mEnds.size()-1]);
	}
	if (verbose > 1) {
	  cout << "# starting merge: " << mStarts.size() << " " << mEnds.size() << endl;
	}
	Vec<pair<index_type, index_type> > mergedPairs = BedChrom::merge(mStarts, mEnds);
	if (verbose > 1) {
	  cout << "# finished merge: " << mergedPairs.size() << endl;
	}
        for (size_type k = 0; k < mergedPairs.size(); ++k) {
	  index_type nstart = mergedPairs[k].first;
	  index_type nend = mergedPairs[k].second; // convert back to 1-based ends
	  os << bedChrom.getChrom() << "\t" << nstart << "\t" << nend;
	  if (writeName) {
	    os << DELIM << bedChrom.getName(j);
	  } 
	  if (writeStrand) {
	    os << DELIM << bedChrom.getStrandString(j); 
	  }  
	  if (writeScore) {
	    os << DELIM << bedChrom.getScore(j); 
	  }  
	  os << endl;
	}
      }
    }
  }
}



  /* What part of a first set of intervals is covered by a second set of intervals (Galaxy coverage tool)
     Example from Galaxy:
if First query are genes

chr11 5203271 5204877 NM_000518 0 -
chr11 5210634 5212434 NM_000519 0 -
chr11 5226077 5227663 NM_000559 0 -
chr11 5226079 5232587 BC020719  0 -
chr11 5230996 5232587 NM_000184 0 -

and Second query are repeats:

chr11      5203895 5203991 L1MA6     500 +
chr11      5204163 5204239 A-rich    219 +
chr11      5211034 5211167 (CATATA)n 245 +
chr11      5211642 5211673 AT_rich    24 +
chr11      5226551 5226606 (CA)n     303 +
chr11      5228782 5228825 (TTTTTG)n 208 +
chr11      5229045 5229121 L1PA11    440 +
chr11      5229133 5229319 MER41A   1106 +
chr11      5229374 5229485 L2        244 -
chr11      5229751 5230083 MLT1A     913 -
chr11      5231469 5231526 (CA)n     330 +

the Result is the coverage density of repeats in the genes:

chr11 5203271 5204877 NM_000518 0 - 172   0.107098
chr11 5210634 5212434 NM_000519 0 - 164   0.091111
chr11 5226077 5227663 NM_000559 0 -  55   0.034678
chr11 5226079 5232587 BC020719  0 - 860   0.132145
chr11 5230996 5232587 NM_000184 0 -  57   0.035827
  */





  /** Writes all pieces intervals of first set that intersect
   * with an interval from second sets.
   * Preferrable: have indices built for second set. 
   */
  static void merge(ostream& os, const Bed& bed1) {
    // cout << "# starting operation merge ..." << endl;
    for (size_type i = 0; i < bed1.size(); ++i) {
      BedChrom bedChrom = bed1.getChrom(i);
#ifndef NDEBUG
      cout << "# Working on chromosome " << bedChrom.getChrom() << endl;
#endif
      bedChrom.merge();
      bedChrom.write(os, false, false, false); // 3-column BED format output
    }
  }

  /** Writes all pieces intervals of first set that intersect
   * with an interval from second sets.
   * Preferrable: have indices built for second set. 
   */
  static void mergeName(ostream& os, const Bed& bed1) {
    // cout << "# starting operation mergeName ..." << endl;
    for (size_type i = 0; i < bed1.size(); ++i) {
      BedChrom bedChrom = bed1.getChrom(i);
#ifndef NDEBUG
      cout << "# Working on chromosome " << bedChrom.getChrom() << endl;
#endif
      bedChrom.mergeNameFast();
      bedChrom.write(os, true,true,true); // 3-column BED format output
    }
  }
  

  /** Writes all intervals of first set that do NOT intersect
   * with at least one interval from second sets.
   * Preferrable: have indices built for second set. 
   */
  static void subtract(ostream& os, const Bed& bed1, const Bed& bed2,
		       bool writeName, bool writeStrand, bool writeScore) {
  for (size_type i = 0; i < bed1.size(); ++i) {
    const BedChrom& bedChrom = bed1.getChrom(i);
#ifndef NDEBUG
    cout << "# Working on chromosome " << bedChrom.getChrom() << endl;
#endif
    size_type otherId = bed2.findChromId(bedChrom.getChrom());
    const BedChrom * bedChrom2 = 0;
    if (otherId < bed2.size()) {
      bedChrom2 = &(bed2.getChrom(otherId));
    }
    for (size_type j = 0; j < bedChrom.size(); ++j) {
      index_type start = bedChrom.getStart(j);
      index_type end = bedChrom.getEnd(j);
      set<size_type> intervalIds;
      if (bedChrom2 != 0) {
	intervalIds = bedChrom2->findAllOverlapping(start, end);
#ifndef NDEBUG
      cout << "# found: " << intervalIds.size() << endl;
#endif
      }
      if (intervalIds.size() == 0) {
	os << bedChrom.getChrom() << "\t" << start << "\t" << end;
	if (writeName) {
	  os << DELIM << bedChrom.getName(j);
	} 
	if (writeStrand) {
	  os << DELIM << bedChrom.getStrandString(j); 
	}  
	if (writeScore) {
	  os << DELIM << bedChrom.getScore(j); 
	}  
	os << endl;
      }
    }
  }
}

  /** Writes all intervals of pairs of intervals from first and second set 
   * that simulatenously do NOT intersect
   * with at least one interval from third sets.
   * Preferrable: have indices built for third set. 
   */
  static void annotatePairs(istream& is, ostream& os,
			    size_type chromCol, size_type startCol, size_type endCol,
			    size_type chromCol2, size_type startCol2, size_type endCol2,
			    const Bed& bed3,
			    bool writeName, bool writeStrand, bool writeScore) {
    
    //  for (size_type i = 0; i < bed1.size(); ++i) {
    long lineCount = 0;
    while (is) {
      string line = getLine(is);
      lineCount++;
      if (line.size() == 0 || line[0] == '#') {
	os << line << endl;
	continue;
      }
      vector<string> tokens = getTokens(line);
      ERROR_IF(tokens.size() <= endCol2, "end column 2 is larger than number of words in line " 
	       + ltos(lineCount));
      string chrom = tokens[chromCol];
      index_type start = stoi(tokens[startCol]);
      index_type end = stoi(tokens[endCol]);
      string chrom2 = tokens[chromCol2];
      index_type start2 = stoi(tokens[startCol2]);
      index_type end2 = stoi(tokens[endCol2]);
      const BedChrom * bedChrom3 = 0;
      const BedChrom * bedChrom3B = 0;
      size_type otherId = bed3.findChromId(chrom);
      if (otherId < bed3.size()) {
	bedChrom3 = &(bed3.getChrom(otherId));
      }
      size_type otherIdB = bed3.findChromId(chrom2);
      if (otherIdB < bed3.size()) {
	bedChrom3B = &(bed3.getChrom(otherIdB));
      }
      set<size_type> intervalIds;
      set<size_type> intervalIds2;
      Vec<size_type> intersectingSet;
      if (bedChrom3 != 0) {
	intervalIds = bedChrom3->findAllOverlapping(start, end);
      }
      if (bedChrom3B != 0) {
	intervalIds2 = bedChrom3B->findAllOverlapping(start2, end2);
      }
      if (chrom == chrom2) {
	ASSERT(bedChrom3 == bedChrom3B); // pointers must be same
	set_intersection(intervalIds.begin(), intervalIds.end(),
			 intervalIds2.begin(), intervalIds2.end(), back_inserter(intersectingSet));
      }
      os << line << DELIM << intervalIds.size() << DELIM << intervalIds2.size() << DELIM
	 << intersectingSet.size() << endl;
    }
  }
  
  
};

#endif
