/* BEGIN software license
 *
 * MsXpertSuite - mass spectrometry software suite
 * -----------------------------------------------
 * Copyright(C) 2009, ..., 2018 Filippo Rusconi
 *
 * http://www.msxpertsuite.org
 *
 * This file is part of the MsXpertSuite project.
 *
 * The MsXpertSuite project is the successor of the massXpert project. This
 * project now includes various independent modules:
 *
 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 *
 * END software license
 */


////////////////////////////// Stdlib includes


/////////////////////// Qt includes
#include <QByteArrayView>
#include <QUuid>


/////////////////////// Local includes
#include "MsXpS/libXpertMassCore/Sequence.hpp"
#include "MsXpS/libXpertMassCore/PolChemDef.hpp"
#include "MsXpS/libXpertMassCore/IndexRangeCollection.hpp"

namespace MsXpS
{
namespace libXpertMassCore
{


/*!

\class MsXpS::libXpertMassCore::Sequence
\inmodule libXpertMassCore
\ingroup PolChemDefBuildingdBlocks
\inheaderfile Sequence.hpp

\brief The Sequence class provides abstractions to work with
a simple sequence of \l{Monomer}s.

A sequence of monomer is a vector of fully qualified \l{Monomer} instances
allocated on the heap.
*/


/*!
\variable MsXpS::libXpertMassCore::Sequence::mcsp_polChemDef

\brief The \l PolChemDef polymer chemistry definition that is the context in
which the Sequence exists.
*/

/*!
\variable MsXpS::libXpertMassCore::Sequence::m_monomers

\brief Vector of allocated \l Monomer instances.
*/

/*!
\variable MsXpS::libXpertMassCore::Sequence::m_isValid

\brief The validity status of this Sequence instance.
*/


/*!
\brief Constructs a totally empty Sequence as an invalid object.
*/
Sequence::Sequence()
{
}

/*!
\brief Constructs a Sequence in the context of the \a
pol_chem_def_csp polymer chemistry definition using the \a sequence_text
representation of a Monomer sequence.

The \a sequence_text is a concatenation of monomer codes. That text sequence is
immediately converted into Monomer instances using \a pol_chem_def_csp as the
reference PolChemDef. The Monomer instances (actually newly allocated Monomer
shared pointers) are stored in the member container keeping the order of the
Monomer codes in \a sequence_text.

If all the Monomer codes in the \a sequence_text were correct, the status of the
Sequence is set to valid,  otherwise it is set to invalid (query with
isValid()).

\sa makeMonomers()
*/
Sequence::Sequence(PolChemDefCstSPtr pol_chem_def_csp,
                   const QString &sequence_text)
  : mcsp_polChemDef(pol_chem_def_csp)
{
  // qDebug() << "The sequence_text:" << sequence_text;

  std::vector<std::size_t> failing_indices;

  makeMonomers(sequence_text, /*reset*/ true, failing_indices);

  if(failing_indices.size())
    {
      QString indices_text;

      for(std::size_t index : failing_indices)
        indices_text += QString("%1, ").arg(index);

      qCritical()
        << "There were errors making the monomers at the following indices:"
        << indices_text;
    }
  else
    m_isValid = true;
}

/*!
\brief Constructs this Sequence instance as a copy of \a other.

The copying is deep with the Monomer instances in the member container being
reinstantiated into this Sequence.
*/
Sequence::Sequence(const Sequence &other)
{
  mcsp_polChemDef = other.mcsp_polChemDef;

  for(const MonomerSPtr &monomer_sp : other.m_monomers)
    storeMonomer(monomer_sp);

  // qDebug() << "The number of Monomer instances:" << m_monomers.size();

  ErrorList error_list;
  m_isValid = validate(&error_list);

  if(!m_isValid)
    {
      qCritical() << "The copy-constructed Sequence is not valid, with errors:"
                  << Utils::joinErrorList(error_list, ", ");
    }
}

/*!
\brief Destructs this sequence.
*/
Sequence::~Sequence()
{
  m_monomers.clear();
}

//////////////// POLYMER CHEMISTRY DEFINTIION /////////////////////


/*!
\brief Sets the polymer chemistry definition to \a pol_chem_def_csp.
*/
void
Sequence::setPolChemDefCstSPtr(PolChemDefCstSPtr pol_chem_def_csp)
{
  mcsp_polChemDef = pol_chem_def_csp;
}

/*!
\brief Returns the polymer chemistry definition.
*/
PolChemDefCstSPtr
Sequence::getPolChemDef() const
{
  return mcsp_polChemDef;
}

//////////////// MONOMERS TEXT / INSTANCES /////////////////////

/*!
\brief Sets the \a sequence of Monomer codes to this Sequence.

No verification is performed on \a sequence. The codes are immediately
converted into newly allocated Monomer instances appended in order to the member
container of Monomer instances.

The member Monomer container is first cleared.

If there are indices of the \a sequence that failed converting to Monomer
instances,  they are stored in \a failing_indices.

After setting the member data, the instance is validated and the result is set
to m_isValid.

Returns -1 if an error occurred, or the count of Monomer instances actually
set to the sequence.
*/
int
Sequence::setSequence(const QString &sequence,
                      std::vector<std::size_t> &failing_indices)
{
  if(mcsp_polChemDef == nullptr || mcsp_polChemDef.get() == nullptr)
    {
      qFatalStream() << "Programming error. The PolChemDef pointer is nullptr.";
    }

  if(!mcsp_polChemDef->isValid())
    {
      qFatalStream() << "Programming error. The PolChemDef is not valid.";
    }

  m_monomers.clear();
  m_uuidMonomerPairs.clear();

  int result = appendSequence(sequence, failing_indices);

  ErrorList error_list;

  m_isValid = validate(&error_list);
  {
    qCritical() << "The Sequence is not valid, with errors:"
                << Utils::joinErrorList(error_list, ", ");
  }

  return result;
}

/*!
\brief Appends the \a sequence of Monomer codes to this Sequence.

No verification is performed on \a sequence. The codes are immediately
converted into newly allocated Monomer instances appended in order to the
m_monomers container. If there are indices of the \a sequence that failed
converting to Monomer instances,  they are stored in \a failing_indices.

After setting the member data, the instance is validated and the result is set
to m_isValid.

Returns -1 if an error occurred, or the count of Monomer instances actually
added to the sequence.
*/
int
Sequence::appendSequence(const QString &sequence,
                         std::vector<std::size_t> &failing_indices)
{
  if(mcsp_polChemDef == nullptr || mcsp_polChemDef.get() == nullptr)
    qFatalStream() << "The PolChemDef pointer is nullptr!";

  if(!mcsp_polChemDef->isValid())
    qFatalStream() << "The PolChemDef is not valid!";

  if(sequence.isEmpty())
    return 0;

  QString unspacified_sequence = unspacifySequence(sequence);

  int result =
    makeMonomers(unspacified_sequence, /*reset*/ false, failing_indices);

  if(result <= 0)
    {
      qCritical() << "The sequence could not be converted into Monomer "
                     "instances (error or empty sequence).";
    }

  ErrorList error_list;

  m_isValid = validate(&error_list);
  {
    qCritical() << "The Sequence is not valid, with errors:"
                << Utils::joinErrorList(error_list, ", ");
  }

  return result;
}

/*!
\brief Returns a string with all the codes of the Monomer instances found in the
member container concatenated in order.
*/
QString
Sequence::getSequence() const
{
  QString sequence;

  for(const MonomerSPtr &monomer_sp : m_monomers)
    sequence += monomer_sp->getCode();

  return sequence;
}

/*!
\brief Returns a string with all the codes of the Monomer instances found in the
member container concatenated in order.

The returned string only contains the sequence of monomer codes for Monomer
instances included in the indices range [\a start_index -- \a stop_index]
(inclusively) in this Sequence's container of Monomer instances.

If \a stop_index is > size(), then it is set to size().

If \a with_modif is true, the Modif instances associated to \l{Monomer}s are
also output to the string. The form of the string is, in this case,

\code
Thr<Phosphorylation>
\endcode

*/
QString
Sequence::getSequence(std::size_t start_index,
                      std::size_t stop_index,
                      bool with_modif) const
{
  if(!size())
    {
      return QString();
    }

  if(start_index > stop_index)
    qFatalStream() << "Programming error, please order the indices: ["
                   << start_index << "-" << stop_index << "]";

  // We want the last index to be included in the range.
  std::size_t local_stop = stop_index + 1;

  if(local_stop > size())
    local_stop = size();

  QString text;

  for(std::size_t iter = start_index; iter < local_stop; ++iter)
    {
      MonomerSPtr monomer_sp = m_monomers.at(iter);

      // qDebug() << "Iterating in Monomer:" << monomer_sp->getCode();

      if(with_modif && monomer_sp->isModified())
        {
          for(const ModifSPtr &modif_sp : monomer_sp->getModifsCstRef())
            text += QString("%1<%2>")
                      .arg(monomer_sp->getCode())
                      .arg(modif_sp->getName());
        }
      else
        text += monomer_sp->getCode();
    }

  return text;
}

/*!
\brief Returns a string with all the codes of the Monomer instances found in the
member container concatenated in order.

The returned string only contains the sequence of monomer codes for Monomer
instances contained in the \l{IndexRange} instances contained in \a
index_ranges.

If \a with_modif is true, the modification(s) associated to \l{Monomer}s are
also output to the string. The form of the string is, in this case,

\code
Thr<Phosphorylation>
\endcode

If \a delimited_regions is true, the sequence of Monomer codes belonging to each
sequence range will be delimited using the IndexRange positions (not the
indices).

\sa IndexRange::positionsAsText()
*/
QString
Sequence::getSequence(const IndexRangeCollection &index_ranges,
                      bool with_modif,
                      bool delimited_regions) const
{
  QString text;

  for(const IndexRange *item : index_ranges.getRangesCstRef())
    {
      QString sequence_string =
        getSequence(item->m_start, item->m_stop, with_modif);

      if(delimited_regions)
        text += QString("Region [%1-%2]: %3\n")
                  .arg(item->m_start + 1)
                  .arg(item->m_stop + 1)
                  .arg(sequence_string);
      else
        text += sequence_string;
    }

  // Removed because this is unnecessary.
  // text += QString("\n");

  return text;
}

/*!
\brief Allocates all the Monomer instances to describe this Sequence's string
representation of monomer codes.

This function parses the \a sequence_text Monomer codes string and, for
each encountered code, creates a \l Monomer instance and adds it to the member
container of Monomer instances.

If \a reset is true, the member container of Monomer instances is reset before
the work is done. Any error that might occur is stored as the index of the
failing Monomer code in the \a failing_indices container.

The allocation of each Monomer instance based on its code is performed by
looking at the reference Monomer in the member polymer chemistry definition..

Because the m_monomerText member string of Monomer codes does not document any
monomer modification, no modifications are handled in this function.

Returns the count of Monomer instances set to the list or -1 if an error
occurred.
*/
int
Sequence::makeMonomers(const QString &sequence_text,
                       bool reset,
                       std::vector<std::size_t> &failing_indices)
{
  if(mcsp_polChemDef == nullptr || mcsp_polChemDef.get() == nullptr ||
     !mcsp_polChemDef->isValid())
    {
      qCritical() << "The PolChemDef pointer is nullptr or not valid!";
      m_isValid = false;
      return -1;
    }

  if(reset)
    m_monomers.clear();

  failing_indices.clear();

  QString local_sequence_text = unspacifySequence(sequence_text);

  // qDebug() << "Sequence:" << local_sequence_text;

  std::size_t index = 0;
  int ret           = -1;
  QString err;
  QString code;

  ret = nextCode(local_sequence_text, code, index, err);

  while(true)
    {
      if(ret < 0)
        {
          // There was an error in the parsed code. Store the index.

          failing_indices.push_back(index);
          ++index;
          ret = nextCode(local_sequence_text, code, index, err);

          continue;
        }

      if(ret == 0)
        break;

      Monomer monomer(mcsp_polChemDef, "", code);

      MonomerSPtr pol_chem_def_monomer_csp =
        mcsp_polChemDef->getMonomerCstSPtrByCode(code);

      if(pol_chem_def_monomer_csp == nullptr)
        {
          qWarning() << "Monomer:" << code
                     << "was not found in the monomer reference list.";

          failing_indices.push_back(index);
          ++index;
          ret = nextCode(local_sequence_text, code, index, err);
          continue;
        }
      else
        {
          // Fully initialize the monomer with that found in the PolChemDef.
          monomer = *pol_chem_def_monomer_csp;
          storeMonomer(std::make_shared<Monomer>(monomer));

          // qDebug() << "The newly created Monomer has masses:"
          //     << monomer.getMass(Enums::MassType::MONO) << "-"
          //     << monomer.getMass(Enums::MassType::AVG);
        }

      ++index;
      ret = nextCode(local_sequence_text, code, index, err);
    }
  // End of
  // while(true)

  if(failing_indices.size())
    return -1;

  if(ret == -1)
    return -1;

  return m_monomers.size();
}

/*!
\brief Returns a const reference to the Monomer container.
*/
const std::vector<MonomerSPtr> &
Sequence::getMonomersCstRef() const
{
  return m_monomers;
}

/*!
\brief Returns a reference to the Monomer container.
*/
std::vector<MonomerSPtr> &
Sequence::getMonomersRef()
{
  return m_monomers;
}

// /*!
// \brief Returns a reference to this Sequence's container of \l Monomer
// instances.
// */
// std::vector<MonomerSPtr> &
// Sequence::getMonomersRef()
// {
//  return m_monomers;
// }

//////////////// MONOMER ACCESSING FUNCTIONS /////////////////////

/*!
\brief Returns the Monomer instance at \a index in this Sequence's
container of Monomer instances as a const raw pointer.

An index that is out of bounds is fatal.
*/
MonomerCstRPtr
Sequence::getMonomerCstRPtrAt(std::size_t index) const
{
  if(index >= m_monomers.size())
    qFatalStream() << "Index is out of bounds.";

  return m_monomers.at(index).get();
}

/*!
\brief Returns the Monomer instance at \a index in this Sequence's
container of Monomer instances as a raw pointer.

An index that is out of bounds is fatal.
*/
MonomerRPtr
Sequence::getMonomerRPtrAt(std::size_t index)
{
  if(index >= m_monomers.size())
    qFatalStream() << "Index is out of bounds.";

  return m_monomers.at(index).get();
}

/*!
\brief Returns the Monomer instance at \a index in this Sequence's
container of Monomer instances as a const shared pointer.

An index that is out of bounds is fatal.
*/
MonomerSPtr
Sequence::getMonomerCstSPtrAt(std::size_t index) const
{
  if(index >= m_monomers.size())
    qFatalStream() << "Programming error. Index is out of bounds:" << index
                   << "with monomer count:" << m_monomers.size();

  return m_monomers.at(index);
}

/*!
\brief Returns the Monomer instance at \a index in this Sequence's
container of Monomer instances as a shared pointer.

An index that is out of bounds is fatal.
*/
MonomerSPtr
Sequence::getMonomerSPtrAt(std::size_t index)
{
  if(index >= m_monomers.size())
    qFatalStream() << "Index is out of bounds.";

  return m_monomers.at(index);
}

/*!
\brief Returns the index of \a monomer_sp in this Sequence's container of
Monomer instances.

The search is based on comparison of the pointers, that is, the returned
index is for the \e same Monomer object (pointer-wise). If the Monomer was
found, the returned value is certain to be correct and \a ok is set to true. If
the Monomer was not found, the returned value is 0 and ok is set to false.
*/
std::size_t
Sequence::monomerIndex(MonomerSPtr monomer_sp, bool &ok) const
{
  std::vector<MonomerSPtr>::const_iterator the_iterator_cst =
    std::find_if(m_monomers.cbegin(),
                 m_monomers.cend(),
                 [monomer_sp](const MonomerSPtr &iter_monomer_sp) {
                   return iter_monomer_sp == monomer_sp;
                 });

  if(the_iterator_cst != m_monomers.cend())
    {
      ok = true;
      return std::distance(m_monomers.cbegin(), the_iterator_cst);
    }

  ok = false;
  return 0;
}

/*!
\brief Returns the index of \a monomer_csp in this Sequence's container of
Monomer instances.

The search is based on comparison of the pointers, that is, the returned
index is for the \e same Monomer object (pointer-wise). If the Monomer was
found, the returned value is certain to be correct and \a ok is set to true. If
the Monomer was not found, the returned value is 0 and ok is set to false.
*/

std::size_t
Sequence::monomerIndex(MonomerCstSPtr monomer_csp, bool &ok) const
{
  std::vector<MonomerSPtr>::const_iterator the_iterator_cst =
    std::find_if(m_monomers.cbegin(),
                 m_monomers.cend(),
                 [monomer_csp](const MonomerSPtr &iter_monomer_sp) {
                   return iter_monomer_sp == monomer_csp;
                 });

  if(the_iterator_cst != m_monomers.cend())
    {
      ok = true;
      return std::distance(m_monomers.cbegin(), the_iterator_cst);
    }

  ok = false;
  return 0;
}

/*!
\brief Returns the index of \a monomer_crp in this Sequence's list of Monomer
instances.

The search is based on comparison of the pointers, that is, the returned
index is for the \e same Monomer object (pointer-wise). If the Monomer was
found, the returned value is certain to be correct and \a ok is set to true. If
the Monomer was not found, the returned value is 0 and ok is set to false.
*/
std::size_t
Sequence::monomerIndex(MonomerCstRPtr monomer_crp, bool &ok) const
{
  if(monomer_crp == nullptr)
    qFatalStream() << "Programming error. Pointer cannot be nullptr.";

  std::vector<MonomerSPtr>::const_iterator the_iterator_cst =
    std::find_if(m_monomers.cbegin(),
                 m_monomers.cend(),
                 [monomer_crp](const MonomerSPtr &iter_monomer_sp) {
                   return iter_monomer_sp.get() == monomer_crp;
                 });

  if(the_iterator_cst != m_monomers.cend())
    {
      ok = true;
      return std::distance(m_monomers.cbegin(), the_iterator_cst);
    }

  ok = false;
  return 0;
}

/*!
\brief Seeks the next code occurring in the \a sequence string of Monomer
codes.

This function starts looking in \a sequence at \a index. The next found
Monomer code is stored in \a code. If \a sequence is not a monomer code, it is
set to \a err.

Returns the count of characters that make \a code. This count can be used
to search for the next code by setting its value incremented by 1 to \a index
for a next function call.
*/
std::size_t
Sequence::nextCode(const QString &sequence,
                   QString &code,
                   std::size_t &index,
                   QString &err)
{
  if(mcsp_polChemDef == nullptr || mcsp_polChemDef.get() == nullptr ||
     !mcsp_polChemDef->isValid())
    qFatalStream()
      << "Programming error. It is not possible that the PolChemDef be "
         "undefined or invalid.";

  QString new_code;
  std::size_t iter = 0;

  // We get a sequence of monomer codes(like "LysArgGlu" for example)
  // and we have to return the next code starting from *index. Note
  // that the sequence must not contain invalid characters. The
  // invalid characters might be placed in err for further scrutiny by
  // the caller.

  // Returns the count of actually parsed characters in the string
  // newCode(copied to 'code' param). If an error occurs -1 is
  // returned and the faulty character is copied in 'err'. 'index' is
  // updated with the index of the last valid character parsed for
  // current code.

  code.clear();
  err.clear();

  std::size_t sequence_length = sequence.length();

  while(1)
    {
      if(iter >= static_cast<std::size_t>(mcsp_polChemDef->getCodeLength()))
        {
          // Because we have progressed farther than authorized by
          // the number of characters allowed in the monomer codes
          // of this polymer chemistry definition, we decrement iter
          // and break the loop... Later in this function, we'll set
          // the proper index in the sequence where next parsing run
          // should occurs (the calling function will increment
          // index by one).

          --iter;
          break;
        }

      if(iter + index >= sequence_length)
        break;

      QChar curChar = sequence.at(iter + index);

      if(!curChar.isLetter())
        {
          //   qDebug() << __FILE__ << __LINE__
          //    << "The character is not a letter:"
          //    << curChar;

          err = curChar;

          // The non-Letter character might be '/', which would be
          // perfectly fine, as we use it to symbolize the actual
          // cleavage site. Which means that we will continue
          // parsing the rest of the string : we have to give the
          // current position back to the caller in the index
          // variable for the next call to this function to start at
          // next character (not falling back to '/', which would
          // make us enter in an infinite loop).

          index = index + iter;

          return -1;
        }

      bool isLower = (curChar.category() == QChar::Letter_Lowercase);

      if(iter == 0)
        {
          if(isLower)
            {
              // qDebug() << __FILE__ << __LINE__
              // << "First character of monomer code might not be"
              // << "lower case; sequence is"
              // << m_monomerText;

              err = curChar;

              return -1;
            }
          else
            {
              // Good, first char is uppercase.
              new_code += curChar;
            }
        }
      else //(iter != 0)
        {
          // We are not in our first iteration. So either the current
          // character is lowercase and we are just continuing to
          // iterate into a multi-char monomer code, or the current
          // character is uppercase, in which case we are starting to
          // iterate in a new monomer code.

          if(isLower)
            new_code += curChar;
          else
            {
              // Decrement iter, because this round was for nothing:
              // we had "invaded" the next monomer code in sequence,
              // which we must not do.

              --iter;
              break;
            }
        }

      ++iter;
    }

  // We finished parsing at most codeLength characters out of
  // sequence, so we have a valid code in the 'code' variable. We
  // can also compute a new index position in the sequence and return
  // the number of characters that we effectively parsed. Note that
  // the caller will be responsible for incrementing the 'index' value
  // by one character unit so as not to reparse the last characters of
  // the sent 'code' object.

  index = index + iter;
  code  = new_code;
  err.clear();

  return code.length();
}

//////////////// MONOMER HANDLING FUNCTIONS /////////////////////

/*!
\brief Inserts \a monomer at index \a index.

If the index value is equal to the size of the Monomer container,
then, the \a monomer is added to the bottom of the container.

If the index value is greater than the size of the Monomer container, that is
fatal.

Returns the Uuid string corresponding to the new Monomer.
*/
QString
Sequence::insertMonomerAt(const Monomer &monomer, std::size_t index)
{
  if(index > size())
    qFatalStream() << "Programming error. Index is out of bounds:" << index;
  else if(index == size())
    return storeMonomer(std::make_shared<Monomer>(monomer));
  else
    return storeMonomer(std::make_shared<Monomer>(monomer), index);

  // Should never reach this point.
  return QString();
}

/*!
\brief Removes the Monomer instance at index \a index from this Sequence's list
of Monomer instances.

An index that is out of bounds is fatal.

Returns true.
*/
bool
Sequence::removeMonomerAt(std::size_t index)
{
  //  qDebug() << "20250514 - Asking to remove Monomer at index" << index;

  if(index >= size())
    qFatalStream() << "Programming error. Index is out of bounds.";

  // Some controls are in order: we have to check that at index, there is
  // a MonomerSPtr that is present both in m_monomers and in
  // m_uuidMonomerPairs.

  MonomerSPtr monomer_sp = m_monomers.at(index);

  qDebug() << "20250514 - The Monomer being removed at index:" << index
           << "is:" << monomer_sp->getName()
           << "with modification status:" << monomer_sp->isModified();

  QString uuid = getUuidForMonomer(monomer_sp);
  if(uuid.isEmpty())
    qFatalStream()
      << "Inconsistency between m_monomers and m_uuidMonomerPairs.";

  std::vector<UuidMonomerWPtrPair>::const_iterator the_iterator_cst =
    std::find_if(m_uuidMonomerPairs.cbegin(),
                 m_uuidMonomerPairs.cend(),
                 [uuid](const UuidMonomerWPtrPair &the_pair) {
                   // Do not query the monomer_sp managed object because it can
                   // be nullptr!
                   return the_pair.first == uuid;
                 });

  if(the_iterator_cst == m_uuidMonomerPairs.cend())
    qFatalStream()
      << "Inconsistency between m_monomers and m_uuidMonomerPairs.";

  m_uuidMonomerPairs.erase(the_iterator_cst);
  m_monomers.erase(m_monomers.begin() + index);

  //  qDebug() << "20250514 - Done removing Monomer.";

  return true;
}

/*!
\brief Modifies the Monomer instance at index \a index with a Modif instance
that is created on the basis of \a modif_name.

If \a override is set to true, then the modification occurs even if the Monomer
at \index was already modified max count times.

An index that is out of bounds is fatal.

Returns true.

\sa Modif::m_maxCount
*/
bool
Sequence::modifyMonomer(std::size_t index,
                        const QString modif_name,
                        bool override)
{
  if(index >= size())
    qFatalStream() << "Programming error. Index is out of bounds.";

  MonomerSPtr monomer_sp = getMonomerSPtrAt(index);

  ErrorList error_list;

  QString uuid = monomer_sp->modify(modif_name, override, &error_list);

  if(uuid.isEmpty() || error_list.size())
    {
      qCritical() << "Monomer modification with Modif:" << modif_name
                  << "failed";
      return false;
    }

  return true;
}

/*!
\brief Returns true if this Sequence instance, between Monomer indices \a
left_index and \a right_index, has at least one modified Monomer instance.

If no Monomer is modified, returns false.
*/
bool
Sequence::hasModifiedMonomer(std::size_t left_index,
                             std::size_t right_index) const
{
  if(left_index >= size() || right_index >= size())
    qFatal("Programming error. Indices out of bounds.");

  if(left_index > right_index)
    qFatal("Programming error. Indices are not correct.");

  for(std::size_t iter = left_index; iter <= right_index; ++iter)
    {
      if(m_monomers.at(iter)->isModified())
        return true;
    }

  return false;
}

/*!
\brief Returns a container holding all the indices corresponding to modified
Monomer instances in the member Monomer container.

The search is performed only between Monomer indices \a left_index and \a
right_index.

If no Monomer is modified, returns an empty container.
*/
std::vector<std::size_t>
Sequence::modifiedMonomerIndices(std::size_t left_index,
                                 std::size_t right_index) const
{
  if(left_index >= size() || right_index >= size())
    qFatal("Programming error. Indices out of bounds.");

  if(left_index > right_index)
    qFatal("Programming error. Indices are not correct.");

  std::vector<std::size_t> indices;

  for(std::size_t iter = left_index; iter <= right_index; ++iter)
    {
      if(m_monomers.at(iter)->isModified())
        indices.push_back(iter);
    }

  return indices;
}

//////////////// SEQUENCE SEARCH FUNCTIONS /////////////////////

/*!
\brief Searches for a Sequence textual \a sequence_motif in this Sequence's
container of Monomer instances, starting at \a index.

\a sequence_motif, a text string is first converted to a container of Monomer
instances (using the reference list of Monomer instances in the member polymer
chemistry definition). Then, this Sequence's container of Monomer instances is
searched for a Monomer stretch that matches that created for \a sequence_motif.

As soon as a Monomer code stretch is found, the index in this Sequence's
container of Monomer instances is set to \a index.

Returns -1 if an error occurred, 1 if \a sequence_motif was found in this
Sequence, 0 otherwise.
*/

int
Sequence::findForwardMotif(const Sequence &sequence_motif,
                           std::size_t &index) const
{
  // qDebug() << "This sequence:" << getSequence();
  // qDebug() << "The motif sequence:" << sequence_motif.getSequence();

  if(!m_isValid)
    {
      qCritical() << "The Sequence is not valid.";
      return -1;
    }

  if(!size())
    {
      qCritical() << "The Sequence is empty.";
      return 0;
    }

  // qDebug() << "motif:" << *(sequence_motif.monomerText()) << "index :" <<
  // index;

  if(!sequence_motif.isValid())
    {
      qCritical() << "The sequence motif to search for is not valid.";
      return -1;
    }

  if(index >= size())
    return -1;

  int sequence_motif_size = sequence_motif.size();

  // qDebug() << "The sequence motif has size:" << sequence_motif_size;

  // If motif's length is 0, then nothing to search for, return
  // unmodified 'index'.
  if(!sequence_motif_size)
    return 0;

  // Simple optimization, if index + size of motif is greater then
  // size of sequence, return right away.
  if(index + sequence_motif_size > size())
    return 0;

  // Compare *this sequence with the one in 'motif', starting at index
  // 'index' in *this sequence and 0 in 'motif'.

  bool matched   = false;
  int matchIndex = 0;

  for(std::size_t iter = index; iter < size(); ++iter)
    {
      std::size_t jter = 0;

      const MonomerSPtr monomer_sp = getMonomerCstSPtrAt(iter);
      const MonomerSPtr motif_monomer_sp =
        sequence_motif.getMonomerCstSPtrAt(jter);

      // qDebug() << "At sequence iter" << iter << monomer_p->getCode()
      //     << "and at motif jter:" << jter << motif_monomer_p->getCode();

      // We do not compare with operator == because that comparison
      // would involve the comparison of modifications inside the
      // monomers, which would not work here.
      if(monomer_sp->getCode() != motif_monomer_sp->getCode())
        continue;

      // An easy check is to see if the number of remaining monomers
      // in the polymer sequence is compatible with the number of
      // monomers still to be matched in the find array. Imagine the
      // sequence of the polymer ends like this: ==========JTOUTVU and
      // the sequence to be searched for is : TVUL What we see is that
      // the T of the TVU of the sequence matches; however we can stop
      // the search right away because there is a 'L' in the search
      // pattern that is not present in the end part of the
      // sequence. This is exactly what is checked below. Note that
      // this check makes SURE that at the end of the second inner
      // loop, when we get out of it, the sole reason we may not
      // consider that the match did not occur is because actually two
      // monomers differred and not because anybody came out of the
      // borders of the sequence in neither the array of the sequence
      // to be searched, nor the array of the polymer sequence. This
      // makes it very easy to assess if a match occurred or not.

      if(size() - iter < sequence_motif.size() - jter)
        {
          // Note that if it were ==, then it would have been possible
          // that the sequence "just-in-time" match prior to ending of
          // the polymer sequence array. Do not forget that we are in
          // forward mode, thus we can break immediately, because we
          // are certain that we won't have any chance to find the
          // sequence downstream of current index.

          matched = false;
          break;
        }

      // qDebug() << monomer_p->getCode()
      //     << "found at sequence iter index: " << iter;

      matchIndex = iter;

      // We have to set the matched boolean to true, because if the
      // motif to find is one monomer-long, then the loop below will
      // not be entered, and we'll fail to know that the match
      // occurred later on.
      matched = true;

      // Now that we have our anchoring point in the *this sequence,
      // let's iterate in the motif, and check if the identity in
      // sequence goes along.

      for(std::size_t kter = jter + 1; kter < sequence_motif.size(); ++kter)
        {
          // At first run in this loop, we are in the second cell of
          // the find list, which means that we should have jter ==
          // 1. And we should compare its contents with those of the
          // cell in the sequence list at index(iter + jter).

          const MonomerSPtr monomer_sp = getMonomerCstSPtrAt(iter + kter);
          const MonomerSPtr motif_monomer_sp =
            sequence_motif.getMonomerCstSPtrAt(kter);

          // qDebug() << "At sequence iter + kter" << iter + kter
          //     << monomer_p->getCode() << "and at motif kter:" << kter
          //     << motif_monomer_p->getCode();

          // We compare codes and not monomers because that
          // comparison would involve the comparison of modifications
          // inside the monomers, which would not work here.
          if(monomer_sp->getCode() == motif_monomer_sp->getCode())
            {
              // The monomers still match.
              matched = true;
              // qDebug() << "still matching";
              continue;
            }
          else
            {
              matched = false;
              // qDebug() << "not matching anymore";
              break;
            }
        }
      // End of
      // for (int kter = jter + 1 ; kter < motif->size() ; ++kter)

      // At this point, we either have normally extinguished the run
      // in the inner loop, or we have gone out of it before its
      // normal termination. In either case, we have to test if the
      // match occurred or not.

      // Check if the match did NOT occur:

      if(!matched)
        {
          // We just continue with the outer loop, that is, we continue
          // searching in this sequence for a match with the
          // first monomer in the motif.

          continue;
        }
      else
        {
          // The match indeed occurred.

          index = matchIndex;
          return 1;
        }
    }
  // End of
  // for (int iter = index; iter < size(); ++iter)

  // No match could be achieved, we have to let the caller function
  // know this in a durable manner : returning 0.

  return 0;
}

//////////////// DIAGNOSTICS FUNCTIONS /////////////////////

/*!
\brief Returns the size of this Sequence as the size of the container of Monomer
instances.
*/
std::size_t
Sequence::size() const
{
  return m_monomers.size();
}

/*!
\brief Returns true if \a index is valid as an index of a Monomer instance in
this Sequence's container of \l{Monomer} instances, false otherwise.
*/
bool
Sequence::isInBound(std::size_t index)
{
  if(index < size())
    return true;

  return false;
}

//////////////// OPERATORS /////////////////////

/*!
\brief Assigns \a other to this Sequence.

The copying is deep with the instances in the \a other container of Monomer
instances being reinstantiated anew into this Sequence's container of Monomer
instances.

Returns a reference to this Sequence.
*/
Sequence &
Sequence::operator=(const Sequence &other)
{
  if(&other == this)
    return *this;

  mcsp_polChemDef = other.mcsp_polChemDef;

  m_monomers.clear();

  for(const MonomerSPtr &monomer_sp : other.m_monomers)
    storeMonomer(monomer_sp);

  return *this;
}

/*!
\brief Returns true if the \c this Sequence is identical to \a other, false
otherwise.

The comparison of the Monomer instances is deep and is not based on merely
comparing the pointers.
*/
bool
Sequence::operator==(const Sequence &other)
{
  if(&other == this)
    return true;

  if(mcsp_polChemDef != other.mcsp_polChemDef)
    {
      qInfo() << "Differing PolChemDef.";
      return false;
    }

  if(m_monomers.size() != other.m_monomers.size())
    {
      qInfo() << "Differing Monomer container size.";
      return false;
    }

  for(std::size_t iter = 0; iter < other.m_monomers.size(); ++iter)
    {
      if(*m_monomers.at(iter) != *other.m_monomers.at(iter))
        return false;
    }

  return true;
}

/*!
\brief Returns true if the \c this Sequence is different than \a other, false
otherwise.

Returns the negation of operator==(other).
*/
bool
Sequence::operator!=(const Sequence &other)
{
  if(&other == this)
    return false;

  return !operator==(other);
}

//////////////// VALIDATIONS /////////////////////
/*!
\brief Validates this Sequence using the member polymer chemistry definition as
the reference polymer chemistry definition and sets m_isValid to the result of
this validation.

Returns true if all the Monomer instances in the member container could be found
in the polymer chemistry definition's container of reference Monomer instances.

Any error is documented by storing a message to \a error_list_p.

\sa makeMonomers()
*/
bool
Sequence::validate(ErrorList *error_list_p) const
{
  qsizetype error_count = error_list_p->size();

  if(mcsp_polChemDef == nullptr || mcsp_polChemDef.get() == nullptr ||
     !mcsp_polChemDef->isValid())
    {
      qCritical() << "The PolChemDef is not available.";
      error_list_p->push_back(
        "The PolChemDef is not available, cannot validate the Sequence "
        "instance.");
    }

  for(const MonomerSPtr &monomer_sp : m_monomers)
    {
      if(monomer_sp->isKnownByCodeInPolChemDef() !=
         Enums::PolChemDefEntityStatus::ENTITY_KNOWN)
        {
          qCritical()
            << "At least one Monomer was not found in the PolChemDef.";
          error_list_p->push_back(
            "At least one Monomer was not found in the PolChemDef");
        }
    }

  m_isValid = (error_list_p->size() > error_count ? false : true);
  return m_isValid;
}

/*!
\brief Returns the validity status of this Sequence instance.
*/
bool
Sequence::isValid() const
{
  return m_isValid;
}

//////////////// UTILS /////////////////////

/*!
\brief Removes all spaces, tabulations, carriage returns and linefeeds from the
\a monomer_text sequence of Monomer codes.
*/
QString
Sequence::unspacifySequence(const QString &monomer_text)
{
  // Removal of all spaces, carriage returns and linefeeds:

  QString local_monomer_text = monomer_text;

  for(int iter = local_monomer_text.length() - 1; iter >= 0; --iter)
    {
      QChar curChar = local_monomer_text.at(iter);

      if(curChar == QChar::Tabulation || curChar == QChar::LineFeed ||
         curChar == QChar::FormFeed || curChar == QChar::CarriageReturn ||
         curChar == QChar::Space || curChar == QChar::Nbsp ||
         curChar == QChar::SoftHyphen)
        local_monomer_text.remove(iter, 1);
    }

  return local_monomer_text;
}

/*!
\brief Stores the Monomer instance pointer \a monomer_sp in the member container
and returns the Uuid string associated to it.
*/
QString
Sequence::storeMonomer(const MonomerSPtr &monomer_sp)
{
  if(monomer_sp == nullptr)
    qFatalStream() << "The provided MonomerSPtr is nullptr.";

  // qDebug() << "Right before storage, there are currently" <<
  // m_monomers.size()
  //          << "monomers.";

  // Do not store an item twice.
  if(hasMonomer(monomer_sp) || !getUuidForMonomer(monomer_sp).isEmpty())
    qFatalStream()
      << "It is prohibited to store the same MonomerCstSPtr more than once.";

  // Even if we get a ref to shared_ptr, the reference count increment will
  // occur.
  m_monomers.push_back(monomer_sp);

  QString uuid = QUuid::createUuid().toString();

  m_uuidMonomerPairs.push_back(UuidMonomerWPtrPair(uuid, monomer_sp));

  // qDebug() << "Right after storage, there are currently" << m_monomers.size()
  //          << "monomers.";

  return uuid;
}

/*!
\brief Stores the Monomer instance pointer in the member container and returns
the Uuid string associated to it.
*/
QString
Sequence::storeMonomer(const MonomerSPtr &monomer_sp, std::size_t index)
{
  if(monomer_sp == nullptr)
    qFatalStream() << "The provided MonomerSPtr is nullptr.";

  // qDebug() << "Right before storage, there are currently" <<
  // m_monomers.size()
  //          << "monomers.";

  // Do not store an item twice.
  if(hasMonomer(monomer_sp) || !getUuidForMonomer(monomer_sp).isEmpty())
    qFatalStream()
      << "It is prohibited to store the same MonomerCstSPtr more than once.";

  // Even if we get a ref to shared_ptr, the reference count increment will
  // occur.
  m_monomers.emplace(m_monomers.begin() + index, monomer_sp);
  QString uuid = QUuid::createUuid().toString();
  m_uuidMonomerPairs.emplace(m_uuidMonomerPairs.begin() + index,
                             UuidMonomerWPtrPair(uuid, monomer_sp));

  // qDebug() << "Right after storage, there are currently" << m_monomers.size()
  //          << "monomers.";

  return uuid;
}

/*!
\brief Returns true if \a monomer_sp was found in the member container of
Monomer instances, false otherwise.
*/
bool
Sequence::hasMonomer(const MonomerSPtr &monomer_sp) const
{
  if(monomer_sp == nullptr)
    qFatalStream() << "Pointer cannot be nullptr.";

  std::vector<MonomerSPtr>::const_iterator the_iterator_cst =
    std::find_if(m_monomers.cbegin(),
                 m_monomers.cend(),
                 [monomer_sp](const MonomerSPtr &the_monomer_csp) {
                   return the_monomer_csp == monomer_sp;
                 });

  if(the_iterator_cst == m_monomers.cend())
    return false;

  // No sanity checks with getMonomerFromUuid() or getUuidForMonomer()
  // because that makes circular calls (these functions make sanity
  // checks by calling this hasMonomer().)

  return true;
}

/*!
\brief Returns true if \a monomer_sp was found in the member container of
Uuid-Monomer pairs, false otherwise.
*/
bool
Sequence::hasUuid(const MonomerSPtr &monomer_sp) const
{
  if(monomer_sp == nullptr)
    qFatalStream() << "Pointer cannot be nullptr.";

  std::vector<UuidMonomerWPtrPair>::const_iterator the_iterator_cst =
    std::find_if(m_uuidMonomerPairs.cbegin(),
                 m_uuidMonomerPairs.cend(),
                 [monomer_sp](const UuidMonomerWPtrPair &the_pair) {
                   return the_pair.second.lock() == monomer_sp;
                 });

  if(the_iterator_cst == m_uuidMonomerPairs.cend())
    return false;

  // Sanity check
  if(!hasMonomer(monomer_sp))
    qFatalStream()
      << "Inconsistency between m_monomers and m_uuidMonomerPairs.";

  return true;
}

/*!
\brief Returns the Monomer instance pointer in the member container that is
associated to the \a uuid Uuid string.

If no such Monomer instance pointer is found,  nullptr is returned.
*/
MonomerSPtr
Sequence::getMonomerForUuid(const QString &uuid) const
{
  // qDebug() << "There are currently" << m_monomers.size()
  //          << "monomers. The uuid that is asked for:" << uuid;

  std::vector<std::pair<QString, MonomerWPtr>>::const_iterator
    the_iterator_cst =
      std::find_if(m_uuidMonomerPairs.cbegin(),
                   m_uuidMonomerPairs.cend(),
                   [uuid](const UuidMonomerWPtrPair &the_pair) {
                     return the_pair.first == uuid;
                   });

  if(the_iterator_cst == m_uuidMonomerPairs.cend())
    return nullptr;

  MonomerSPtr monomer_csp = (*the_iterator_cst).second.lock();

  // Sanity check

  if(!hasMonomer(monomer_csp))
    qFatalStream()
      << "Inconsistency between m_monomers and m_uuidMonomerPairs.";

  // qDebug() << "Found Monomer:" << monomer_csp->getCode()
  //          << "with modification status:" << monomer_csp->isModified();

  return monomer_csp;
}

/*!
\brief Returns the UUID string identifying \a monomer_sp in the member
container.

If no such Monomer is found, an empty string is returned.
*/
QString
Sequence::getUuidForMonomer(const MonomerSPtr &monomer_sp) const
{
  if(monomer_sp == nullptr)
    qFatalStream() << "Programming error. Pointer cannot be nullptr.";

  std::vector<UuidMonomerWPtrPair>::const_iterator the_iterator_cst =
    std::find_if(m_uuidMonomerPairs.cbegin(),
                 m_uuidMonomerPairs.cend(),
                 [monomer_sp](const UuidMonomerWPtrPair &the_pair) {
                   // Do not query the monomer_sp managed object because it can
                   // be nullptr!
                   return the_pair.second.lock() == monomer_sp;
                 });

  if(the_iterator_cst == m_uuidMonomerPairs.cend())
    {
      // Sanity check
      if(hasMonomer(monomer_sp))
        qFatalStream() << "Inconsistency between the m_monomers and the "
                          "m_uuidMonomerPairs vectors.";

      return QString();
    }

  // Sanity check
  if(!hasMonomer(monomer_sp))
    qFatalStream() << "Inconsistency between the m_monomers and the "
                      "m_uuidMonomerPairs vectors.";

  return (*the_iterator_cst).first;
}

/*!
\brief Returns a container of QString instances that correspond to the UUID
strings that identify all the Monomer instances involved in this Sequence.

If no Monomer is found, an empty container is returned.
*/
std::vector<QString>
Sequence::getAllMonomerUuids() const
{
  std::vector<QString> the_uuid_strings;

  for(const UuidMonomerWPtrPair &pair : m_uuidMonomerPairs)
    the_uuid_strings.push_back(pair.first);

  // Sanity check
  if(the_uuid_strings.size() != m_monomers.size())
    qFatalStream()
      << "Inconsistency between the <object>_s and <uuid-object> pairs.";

  return the_uuid_strings;
}

/*!
\brief Removes from the member container all the Monomer instance pointers that
are not found to still be alive.
*/
void
Sequence::cleanupMonomers()
{
  // qDebug() << "At beginning, count of UUID-Monomer pairs:"
  //          << m_uuidMonomerPairs.size();

  std::vector<UuidMonomerWPtrPair>::iterator the_iterator =
    m_uuidMonomerPairs.begin();
  std::vector<UuidMonomerWPtrPair>::iterator the_end_iterator =
    m_uuidMonomerPairs.end();

  while(the_iterator != the_end_iterator)
    {
      if((*the_iterator).second.expired() ||
         (*the_iterator).second.lock() == nullptr ||
         !hasMonomer((*the_iterator).second.lock()))
        the_iterator = m_uuidMonomerPairs.erase(the_iterator);
      else
        ++the_iterator;
    }

  // qDebug() << "At end, count of UUID-Monomer pairs:"
  //          << m_uuidMonomerPairs.size();
}

/*!
\brief Returns a checksum calculated on this Sequence's portion contained in
[\a index_start -- \a index_stop].

The sequence matching the [\a index_start -- \a index_stop] range is extracted
from m_monomerText, with (\a with_modifs is true) or without (\a with_modifs is
false) the monomer modifications. The checksum is computed on that extracted
string.

Returns the checksum.
*/
quint16
Sequence::checksum(int index_start, int index_stop, bool with_modifs) const
{
  //  qDebug() << "index_start:" << index_start << "index_stop" << index_stop;

  if(!size())
    return 0;

  if(index_start > index_stop)
    std::swap(index_start, index_stop);

  QString text = getSequence(index_start, index_stop, with_modifs);

  QByteArray bytes = text.toUtf8();

  quint16 checksum = qChecksum(QByteArrayView(bytes));

  //   qDebug() << __FILE__ << __LINE__
  //    << "checksum:" << checksum;

  return checksum;
}

/*!
\brief Reset this Sequence instance to default values.
*/
void
Sequence::clear()
{
  m_monomers.clear();
  m_uuidMonomerPairs.clear();

  m_isValid = false;
}


} // namespace libXpertMassCore
} // namespace MsXpS
