Monthly Archives: September 2018

Word Counting Function Implemented in C

I recently completed an assignment for my Modern Operating Systems class where the teacher asked us to develop a program written in C that counts the number of words in a text file that is given as the first parameter. I haven’t used the C language in over 15 years, so this was an exciting challenge. Especially because I had to implement linked lists in C. My previous experience with linked lists was in OCaml and C# where they were basic types that were included in the languages. Needing to implement them myself really solidified my knowledge of them.

My next assignment is to create a simple shell that only uses the following functions:

  • malloc
  • free
  • exit
  • fgetc
  • fputc
  • fputs
  • fprintf
  • sprintf
  • snprintf
  • vsprintf
  • vsnprintf
  • gettimeofday
  • strerror

It is pretty exciting that we will be developing an operating system for an ARM processor as the project in our class. This will be a challenge I’ve wanted to tackle for a while. I’ve been reading the source to bash and csh, something I would not have thought to do previously. I don’t fully understand everything in the source, but I’m definitely seeing how it is put together and various properties like file descriptors and pipes are handled helps me understand the types of data structures I may want to use.

If I had to do this assignment over again, I would have implemented linked lists for every character array so the lengths of “strings” (character arrays) could be bound only by memory limitations. Right now the code also would split a word longer than 4096 characters into two words (and not leave the 4096 character word null terminated which is a no-no).

//includes
#include <stdio.h>
#include <stdlib.h> // For exit() function
#include <unistd.h> //for F_OK, https://www.gnu.org/software/libc/manual/html_node/Testing-File-Access.html
#include <string.h> //for string.h

//macro definitions
#define FILENAME_POSITION 1
#define MAX_WORD_LENGTH 256
#define DEBUG_MODE 0

//type definitions
//I learned how to use linked lists in c from here: https://www.zentut.com/c-tutorial/c-linked-list/
typedef struct node{
	int count;
	char word[MAX_WORD_LENGTH];
	struct node *next;
} node_t;

//function prototypes
int main(int argc, char *argv[]);
int validate_input(int argc, char *argv[]);
void display_usage();
void process_file(char *argv[]);
void output_line(char line[], int data);
void push(node_t * head, int count, char word[]);
void print_list(node_t * head);

//function definitions
int main(int argc, char *argv[])
{
	validate_input(argc, argv);
	process_file(argv);
	return(0);
}

int validate_input(int argc, char *argv[])
{
	if(argc < 2) { display_usage(); exit(-3); } //https://stackoverflow.com/a/230068/1330074 if(access(argv[FILENAME_POSITION], F_OK) != -1) { //puts("file exists!"); } else { puts("file doesn't exist!"); exit(-2); } return 0; } void display_usage() { puts("word-count usage information:\nexample: word-count filename.txt"); } void process_file(char *argv[]) { FILE *fp; //i would like to implement this buffer as a linked list so there wouldn't be this hardcoded limit but time did not allow int buffer_size = 4096; char line[buffer_size]; fp = fopen(argv[FILENAME_POSITION], "r"); if(fp == NULL) { perror("Error opening file! Exiting program"); exit(-1); } int number_of_lines = 0; node_t* head = NULL; head = malloc(sizeof(node_t)); if (head == NULL) { puts("Unable to create head node! Terminating."); exit(-5); } while(fgets (line, buffer_size, fp) != NULL) { //fputs(line, stdout); char *token; //learned to use strtok from https://www.tutorialspoint.com/c_standard_library/c_function_strtok.htm token = strtok(line, " \n\r\t"); while( token != NULL ) { //printf( " %s\n", token ); push(head, 1, token); token = strtok(NULL, " \n\r\t"); } number_of_lines++; } output_line("Lines: ", number_of_lines); print_list(head); fclose(fp); //output_line("Lines: ", count(first)); } void output_line(char line[], int data) { printf("%s ", line); printf("%d\n", data); } void push(node_t * head, int count, char word[]) { if(DEBUG_MODE == 1) { printf("Now adding %s to the linked list...\n", word); } node_t * current = head; int word_already_in_list = 0; while (current->next != NULL)
    {
		if(strcmp(current->word, word) == 0)
		{
		    if(DEBUG_MODE == 1)
			{
				printf("%s is equal to %s, updating the count!\n", word, current->word);
			}
			
			word_already_in_list = 1;
			current->count++;
			break;
		}
		else
		{
		    if(DEBUG_MODE == 1)
			{
				printf("%s is NOT equal to %s, still looking...\n", word, current->word);
			}

		}

        current = current->next;
    }

	if(strcmp(current->word, word) == 0)
	{
	    if(DEBUG_MODE == 1)
		{
			printf("%s is equal to %s, updating the count!\n", word, current->word);
		}
		
		word_already_in_list = 1;
		current->count++;
	}
	else
	{
	    if(DEBUG_MODE == 1)
		{
			printf("%s is NOT equal to %s, need to add it to the list!\n", word, current->word);
		}
	}

	if(word_already_in_list == 0)
	{
	    current->next = malloc(sizeof(node_t));
	    current->next->count = count;
		strcpy(current->next->word, word);
	    current->next->next = NULL;
	}

}

void print_list(node_t * head)
{
    node_t * current = head;
    unsigned int number_of_words = 0;
    puts("Word count:");
    while (current != NULL)
    {
        if(current->count != 0)
        {
	        printf("%s: %d\n", current->word, current->count);
	        number_of_words = number_of_words + current->count;
    	}
    	current = current->next;
    }

    printf("Words: %d\n", number_of_words);
}