//===========================================================================
// @(#) $DwmPath$
// @(#) $Id$
//===========================================================================
//  Copyright (c) Daniel W. McRobb 2017, 2024
//  All rights reserved.
//
//  Redistribution and use in source and binary forms, with or without
//  modification, are permitted provided that the following conditions
//  are met:
//
//  1. Redistributions of source code must retain the above copyright
//     notice, this list of conditions and the following disclaimer.
//  2. Redistributions in binary form must reproduce the above copyright
//     notice, this list of conditions and the following disclaimer in the
//     documentation and/or other materials provided with the distribution.
//  3. The names of the authors and copyright holders may not be used to
//     endorse or promote products derived from this software without
//     specific prior written permission.
//
//  IN NO EVENT SHALL DANIEL W. MCROBB BE LIABLE TO ANY PARTY FOR
//  DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
//  INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE,
//  EVEN IF DANIEL W. MCROBB HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
//  DAMAGE.
//
//  THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND
//  DANIEL W. MCROBB HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
//  UPDATES, ENHANCEMENTS, OR MODIFICATIONS. DANIEL W. MCROBB MAKES NO
//  REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER
//  IMPLIED OR EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
//  WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE,
//  OR THAT THE USE OF THIS SOFTWARE WILL NOT INFRINGE ANY PATENT,
//  TRADEMARK OR OTHER RIGHTS.
//===========================================================================

//---------------------------------------------------------------------------
//!  \file rvcoalesce.cc
//!  \brief NOT YET DOCUMENTED
//---------------------------------------------------------------------------

extern "C" {
  #include <time.h>
  #include <unistd.h>
  #include <zlib.h>
}

#include <fstream>
#include <iostream>
#include <iomanip>
#include <regex>
#include <sstream>
#include <tuple>
#include <vector>

#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/gzip.hpp>

#include "DwmIpv4Routes.hh"

using namespace std;
using boost::iostreams::filtering_streambuf;
using boost::iostreams::gzip_decompressor;
using boost::iostreams::gzip_compressor;

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
bool CoalesceDataFile(const string & inFileName, string & outFileName)
{
  bool    rc = false;
  outFileName = inFileName;
  size_t  idx = outFileName.find_last_of(".gz");
  if (idx != string::npos) {
    outFileName = outFileName.substr(0, idx - 2);
    outFileName += "_coalesced.gz";
  }
  else {
    outFileName	+= "_coalesced";
  }
  
  ifstream  is(inFileName.c_str());
  if (is) {
    ofstream  os(outFileName.c_str(), std::ios_base::binary);
    if (os) {
      filtering_streambuf<boost::iostreams::input>  gzin;
      gzin.push(gzip_decompressor());
      gzin.push(is);
      istream  gzis(&gzin);
      filtering_streambuf<boost::iostreams::output>  gzout;
      gzout.push(gzip_compressor(boost::iostreams::zlib::best_compression));
      gzout.push(os);
      std::ostream  gzos(&gzout);

      Dwm::Ipv4Routes<string>  routes;
      string  addrstr, maskstr, asnumstr;
      while (gzis >> addrstr >> maskstr >> asnumstr) {
        Dwm::Ipv4Address  addr(addrstr);
        uint8_t  maskLen = stoul(maskstr) & 0xFF;
        Dwm::Ipv4Prefix  pfx(addr, maskLen);
        routes[pfx] = asnumstr;
      }
      routes.Coalesce();
          
      vector<pair<Dwm::Ipv4Prefix,string>>  rtvec;
      routes.SortByKey(rtvec);
      for (auto & rve : rtvec) {
        gzos << rve.first.Network() << '\t'
             << (uint16_t)rve.first.MaskLength() << '\t'
             << rve.second << '\n';
      }
      rc = true;
      boost::iostreams::close(gzout);
      os.close();
    }
    is.close();
  }
  return rc;
}

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
bool CreatePrefix2ASFile(const string & coalescedInFile,
                         const string & outFile)
{
  bool      rc = false;
  ifstream  is(coalescedInFile);
  if (is) {
    ofstream  os(outFile);
    if (os) {
      //  input file must be a gzip file.  Set up gzip input filter.
      filtering_streambuf<boost::iostreams::input>  gzin;
      gzin.push(gzip_decompressor());
      gzin.push(is);
      istream  gzis(&gzin);

      //  Read all the data into an Ipv4Routes<string> object.
      Dwm::Ipv4Routes<uint32_t>  routes;
      string  addrstr, maskstr, asnumstr;
      while (gzis >> addrstr >> maskstr >> asnumstr) {
        Dwm::Ipv4Address  addr(addrstr);
        uint8_t  maskLen = stoul(maskstr) & 0xFF;
        Dwm::Ipv4Prefix  pfx(addr, maskLen);
        routes[pfx] = stoul(asnumstr);
      }
      //  re-coalesce since I reduce AS sets to a single AS and it may
      //  affect coalescing.
      // routes.Coalesce();
      //  Save the data in binary form and close the output file.
      routes.Write(os);
      os.close();
      rc = true;
    }
    is.close();
  }
  return rc;
}

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
bool CreatePrefix2ASFiles(vector<string> & files)
{
  bool  rc = true;
  regex  rgx("_coalesced\\.gz", regex::ECMAScript|regex::optimize);
  
  for (auto & file : files) {
    string  outFileName = regex_replace(file, rgx, ".bin");
    if (! CreatePrefix2ASFile(file, outFileName)) {
      rc = false;
    }
  }
  return rc;
}

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
bool CoalesceDataFiles(vector<string> & fileNames)
{
  bool  rc = true;
  auto  it = fileNames.begin();
  for ( ; it != fileNames.end(); ++it) {
    string  coalescedFileName;
    if (CoalesceDataFile(*it, coalescedFileName)) {
      *it = coalescedFileName;
    }
    else {
      rc = false;
    }
  }
  return rc;
}

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
void Usage(const string & argv0)
{
  cerr << "usage: " << argv0 << " routeviews_file\n";
  return;
}

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
bool CoalesceGivenFile(const string & inFile)
{
  bool  rc = false;
  vector<string>  fileNames({inFile});
  if (CoalesceDataFiles(fileNames)) {
    if (CreatePrefix2ASFiles(fileNames)) {
      rc = true;
    }
  }
  return rc;
}

//----------------------------------------------------------------------------
//!  Coalesces a CAIDA prefix to AS file, for example:
//!  https://data.caida.org/datasets/routing/routeviews-prefix2as/2024/03/routeviews-rv2-20240317-1200.pfx2as.gz
//----------------------------------------------------------------------------
int main(int argc, char *argv[])
{
  if (argc > 1) {
    if (CoalesceGivenFile(argv[1])) {
      return 0;
    }
    else {
      return 1;
    }
  }
  else {
    Usage(argv[0]);
    return 1;
  }
}
