/*****************************************************************************
* Program Summary:
*     Count the number of unique words in a given text file and display the
*     top n-results
*
*  Credits:
*     Peter Hull (for finding the "partial_sort_copy" and code snippet used
*        in the "isGreater" function)
*****************************************************************************/
#include <algorithm>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <vector>

using namespace std;


typedef map<string, int, less<string> > SMAP;
typedef pair<string, int> SPAIR;
typedef vector<SPAIR> SVEC;


/// PROTOTYPES
string toLower(const string &);
bool isValid(const char);
bool isGreater(const SPAIR &, const SPAIR &);
/// PROTOTYPES


/*****************************************************************************
*
*  Function:   toLower()
*
*  Parameters: theString   A string to convert to lowercase
*
*  Return:     Lowercase letter of 'theString'
*
*  Purpose:    Custom "tolower" that returns a type char
*****************************************************************************/
string toLower(const string &theString)
{
   string result = "";
   char temp;
   int curLetter;

   for (curLetter = 0; curLetter < theString.length(); curLetter++)
   {
      temp = theString.at(curLetter);

      if (isValid(temp))
      {
         // These letters are uppercase: convert to lowercase
         if (temp >= 65 && temp <= 90)
         {
            temp += 32;
         }

         // For this function, A-Z, a-z, 0-9, and hyphen are all valid
         result.append(1, temp);
      }
   }

   return result;
}


/*****************************************************************************
*
*  Function:   isValid()
*
*  Parameters: theChar   Given character to check
*
*  Return:     TRUE if the character provided was:
*                 [a-zA-Z0-9] or a hyphen
*              FALSE otherwise
*
*  Purpose:    Custom "isalnum" that allows numbers, letters, or a hyphen as
*              valid characters
*****************************************************************************/
bool isValid(const char theChar)
{
   bool result = false;

   if (theChar == 45)                        // Hyphen
      result = true;
   else if (theChar >= 48 && theChar <= 57)  // 0-9
      result = true;
   else if (theChar >= 65 && theChar <= 90)  // A-Z
      result = true;
   else if (theChar >= 97 && theChar <= 122) // a-z
      result = true;

   return result;
}


/*****************************************************************************
*
*  Function:   isGreater()
*
*  Parameters: one, two    Both type std::pair used to evaluate whether 'two'
*                          is greater than 'one'
*
*  Return:     TRUE if 'two' is greater than 'one'
*              FALSE otherwise
*
*  Purpose:    Custom "greater-than" to check a std::pair and return whether
*              the second is larger than the first.
*****************************************************************************/
bool isGreater(const SPAIR &one, const SPAIR &two)
{
   return one.second > two.second;
}


/*****************************************************************************
*
*  Function:   showUsage()
*
*  Parameters: Name of current program
*
*  Return:     0 (program didn't cause error, but no filenames given)
*
*  Purpose:    Output valid command-line arguments and exit program.
*****************************************************************************/
int showUsage(char *programName)
{
   cout << "Usage:\n  " << programName << " [-num] file1 [file2 file3...]\n"
        << "\tnum:\tnumber of results to display (default: 100)\n"
        << "\tfile#:\tname of files to parse\n";
   return 0;
}


/*****************************************************************************
*
*  Function:   main()
*
*  Parameters: filenames to load
*
*  Return:     none
*
*  Purpose:    Count unique words from a text file and display the top results
*****************************************************************************/
int main(int argc, char **argv)
{
   SMAP mapString;
   SVEC::iterator myIter;

   string temp;                  // General-purpose string variable

   // These are used to track valid words in the file
   string c;                     // Single character (more flexible than char)
   unsigned int curLetter = 0;   // I could have used 'int i' but... nah
   unsigned int numWords = 0;    // Total number of words processed from file
   int numResults = 100;         // User-specified number of results
   int tempNumResults;           // Used when actual results < 'numResults'

   // These are used in support of filename-specified files
   int numFiles = 0;          // Number of files read in from command-line
   vector<string> filename;   // Name of commandline-specified files
   string currentFilename;    // Name of filename in current use

   // These are used when inputting text from a file
   string input;              // Text from files that will be parsed
   ifstream fin;              // Used to retrieve input from files

   if (argc < 2)
      // No filenames: show usage
      return showUsage(argv[0]);
   else
   {
      for (int i = 1; i < argc; i++)
      {
         if (argv[i][0] == '-')
         {
            numResults = atoi(argv[i] + 1);
            if (numResults < 0)
               numResults = 100;    // Default number of results is 100
         }
         else
         {
            currentFilename = argv[i];

            fin.open(currentFilename.c_str());
            if (!fin || fin.fail())
               fin.clear();   // If file cannot be opened, silently skip it
            else
            {
               // Put filename onto the back of the vector
               filename.push_back(currentFilename);
               numFiles++;
            }
            fin.close();
         }
      }
   }

   // Useful here incase files could not be opened, or none specified
   if (numFiles == 0)
      return showUsage(argv[0]);

   // Parse each file separately
   for (int curFile = 0; curFile < numFiles; curFile++)
   {
      mapString.clear();
      currentFilename = filename.at(curFile);
      numWords = 0;

      fin.clear();
      fin.open(currentFilename.c_str());

      if (fin.fail())
      {
         cout << "Could not open \"" << currentFilename.c_str() << "\".\n";
         fin.close();
         continue;
      }

      fin >> input;
      while (!fin.eof())
      {
         temp = "";
         for (curLetter = 0; curLetter < input.size(); curLetter++)
         {
            c = input[curLetter];   // Check each character one-at-a-time
            if (isValid(c[0]))      // Pass in 'c' as type 'char'
               temp = temp + toLower(c);
            else
               break;
         }
         if (temp != "")
         {
            mapString[temp]++;
            numWords++;
         }
         fin >> input;
      }
      fin.close();

      // If number of results < results requested by user, show lesser number
      if (numResults > mapString.size())
         tempNumResults = mapString.size();
      else
         tempNumResults = numResults;

      cout << "\nNumber of words processed in " << currentFilename << ": "
           << numWords << endl;
      cout << "Top " << tempNumResults << " results and their frequencies:\n";

      SVEC mapVector(tempNumResults);
      partial_sort_copy(mapString.begin(), mapString.end(), mapVector.begin(),
                     mapVector.end(), isGreater);

      curLetter = 1;
      for (myIter = mapVector.begin(); myIter != mapVector.end(); myIter++)
      {
         cout << right << setw(4) << curLetter << ": "
              << myIter->first << ": " << myIter->second << endl;
         curLetter++;
      }

   }
   return 0;
}
