/* 
 * This program uses a binary search tree to count the number of times
 * that each word appears in a specified text file.
 *
 * Written by: Carl Sable
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// Maximum number of characters per word (not including \0)
#define MAX_LENGTH 25

typedef struct node
{
  char word[MAX_LENGTH];
  int count;
  struct node *left;
  struct node *right;
} NODE;

typedef NODE *PNODE;

PNODE insert_word(PNODE, char []);
PNODE create_node(char []);
void inorder_traversal(PNODE);

/* Open file, loop through words, display counts. */
int main(void)
{
  char filename[MAX_LENGTH];
  char word[MAX_LENGTH];
  FILE *fpSource;
  PNODE pTemp, pTree = NULL;

  printf("Enter file name: ");
  scanf("%s", filename);

  if ((fpSource = fopen(filename, "r")) == NULL)
  {
    fprintf(stderr, "Could not open \"%s\" for reading.\n", filename);
    return 1;
  }

  /* Loop through words in file, insert them into binary tree. */
  while (fscanf(fpSource, "%24s", word) != EOF)
    pTree = insert_word(pTree, word);

  fclose(fpSource);

  /* Display results. */
  inorder_traversal(pTree);

  return 0;
}

/* Insert a new word into tree or increase count of word if already present. */
PNODE insert_word(PNODE pTree, char word[])
{
  PNODE pNew, pParent, pCur;

  if (pTree == NULL)
  {
    /* new node is first node inserted. */
    pNew = create_node(word);
    return pNew;
  }

  pParent = NULL;
  pCur = pTree;

  /* Locate position of word in tree. */
  while((pCur != NULL) && strcmp(pCur->word, word) != 0)
  {
    pParent = pCur;
    if (strcmp(pCur->word, word) > 0)
      pCur = pCur->left;
    else
      pCur = pCur->right;
  }

  if ((pCur != NULL) && strcmp(pCur->word, word) == 0)
  {
    /* Word has occured before, increase count. */
    pCur->count = pCur->count + 1;
  }
  else
  {
    /* Insert new node as leaf of tree. */
    pNew = create_node(word);
    if (strcmp(pParent->word, word) > 0)
      pParent->left = pNew;
    else
      pParent->right = pNew;
  }

  return pTree;
}

/* Create a new node for given word. */
PNODE create_node(char word[])
{
  PNODE pTemp;

  pTemp = (PNODE) malloc (sizeof(NODE));
  if (pTemp == NULL)
  {
    fprintf(stderr, "OUT OF MEMORY!\n");
    return NULL;
  }

  /* Copy word into node. */
  strcpy(pTemp->word, word);
  /* First time word occured, so count is 1. */
  pTemp->count = 1;
  pTemp->left = NULL;
  pTemp->right = NULL;

  return pTemp;
}

/* Display words and counts in sorted order. */
void inorder_traversal(PNODE pCur)
{
  if (pCur == NULL)
    return;

  inorder_traversal(pCur->left);
  printf("%-25s%d\n", pCur->word, pCur->count);
  inorder_traversal(pCur->right);
  return;
}
