You can write a state machine that should handle most cases.
As you scan the file, you’ll be in one of the following states:
- TEXT – regular (non-commented) text; this is the state you’ll start in. Any newline seen in this state will cause the total-lines counter to be incremented.
- SAW_SLASH – You’ve seen a single
/
, which may be the start of a single- or multi-line comment. If the next character is a/
, you’ll go into the SINGLE_COMMENT state. If the next character is a*
, you’ll go into the MULTI_COMMENT state. For any other character, you go back into the TEXT state. - SINGLE_COMMENT – you’ve seen the
//
token; you will stay in this state until you see a newline character; once you see the newline character you’ll increment the number of single-line comments as well as total lines, and go back to the TEXT state. - MULTI_COMMENT – you’ve seen the
/*
token; you will stay in this state until you see the next*/
token. Any newline you see in this state will cause the multi-comment line counter to be incremented along with the total lines. - SAW_STAR – While in the MULTI_COMMENT state, you’ve seen a single
*
. If the next character is/
, you’ll go back to the TEXT state. If the next character is*
, you’ll stay in the SAW_STAR state. Otherwise you’ll go back to the MULTI_COMMENT state.
There are edge cases that I’m not dealing with (such as encountering an EOF while in a comment state), but the following should be a reasonable example of how you can do stuff like this.
Note that nested comments won’t be counted; i.e., if a //
-delimited comment appears within a /* */
-delimited comment, only the multi-comment counter will be updated.
You will probably want to factor the counting logic into its own function; just trying to keep the example as straightforward as I can.
#include <stdio.h>
#include <stdlib.h>
/**
* Count up the number of total lines, single-comment lines,
* and multi-comment lines in a file.
*/
int main(int argc, char **argv)
{
FILE *fp;
int c;
unsigned int chars = 0;
unsigned int total = 0;
unsigned int multi = 0;
unsigned int single = 0;
enum states { TEXT,
SAW_SLASH,
SAW_STAR,
SINGLE_COMMENT,
MULTI_COMMENT } state = TEXT;
if ( argc < 2 )
{
fprintf(stderr, "USAGE: %s <filename>\n", argv[0]);
exit(0);
}
fp = fopen( argv[1], "r" );
if ( !fp )
{
fprintf(stderr, "Cannot open file %s\n", argv[1] );
exit(0);
}
while ( (c = fgetc( fp )) != EOF )
{
chars++;
switch( state )
{
case TEXT :
switch( c )
{
case "https://stackoverflow.com/" : state = SAW_SLASH; break;
case '\n' : total++; // fall-through
default : break;
}
break;
case SAW_SLASH :
switch( c )
{
case "https://stackoverflow.com/" : state = SINGLE_COMMENT; break;
case '*' : state = MULTI_COMMENT; break;
case '\n' : total++; // fall through
default : state = TEXT; break;
}
break;
case SAW_STAR :
switch( c )
{
case "https://stackoverflow.com/" : state = TEXT; multi++; break;
case '*' : break;
case '\n' : total++; multi++; // fall through
default : state = MULTI_COMMENT; break;
}
break;
case SINGLE_COMMENT :
switch( c )
{
case '\n' : state = TEXT; total++; single++; // fall through
default : break;
}
break;
case MULTI_COMMENT :
switch( c )
{
case '*' : state = SAW_STAR; break;
case '\n' : total++; multi++; // fall through
default : break;
}
break;
default: // NOT REACHABLE
break;
}
}
fclose(fp);
printf( "File : %s\n", argv[1] );
printf( "Total lines : %8u\n", total );
printf( "Single-comment lines : %8u\n", single );
printf( "Multi-comment lines : %8u\n", multi );
return 0;
}
EDIT
Here’s a table-driven equivalent to the program above. I create a state
table to control state transitions and an action
table to control what happens when I change state.
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
/**
* Using preprocessor macros instead of enums, per request; normally
* I would use enums, since they obey scoping rules and
* show up in debuggers.
*/
#define TEXT 0
#define SAW_SLASH 1
#define SAW_STAR 2
#define SINGLE_COMMENT 3
#define MULTI_COMMENT 4
#define TOTAL_STATES 5
#define NO_ACTION 0
#define INC_TOTAL 1
#define INC_SINGLE 2
#define INC_MULTI 4
/**
* This example assumes 7-bit ASCII, for a total of
* 128 character encodings. You'll want to change this
* to handle other encodings.
*/
#define ENCODINGS 128
/**
* Need a state table to control state transitions and an action
* table to specify what happens on a transition. Each table
* is indexed by the state and the next input character.
*/
static int state[TOTAL_STATES][ENCODINGS]; // Since these tables are declared at file scope, they will be initialized to
static int action[TOTAL_STATES][ENCODINGS]; // all elements 0, which correspond to the "default" states defined above.
/**
* Initialize our state table.
*/
void initState( int (*state)[ENCODINGS] )
{
/**
* If we're in the TEXT state and see a "https://stackoverflow.com/" character, move to the SAW_SLASH
* state, otherwise stay in the TEXT state
*/
state[TEXT]["https://stackoverflow.com/"] = SAW_SLASH;
/**
* If we're in the SAW_SLASH state, we can go one of three ways depending
* on the next character.
*/
state[SAW_SLASH]["https://stackoverflow.com/"] = SINGLE_COMMENT;
state[SAW_SLASH]['*'] = MULTI_COMMENT;
state[SAW_SLASH]['\n'] = TEXT;
/**
* For all but a few specific characters, if we're in any one of
* the SAW_STAR, SINGLE_COMMENT, or MULTI_COMMENT states,
* we stay in that state.
*/
for ( size_t i = 0; i < ENCODINGS; i++ )
{
state[SAW_STAR][i] = MULTI_COMMENT;
state[SINGLE_COMMENT][i] = SINGLE_COMMENT;
state[MULTI_COMMENT][i] = MULTI_COMMENT;
}
/**
* Exceptions to the loop above.
*/
state[SAW_STAR]["https://stackoverflow.com/"] = TEXT;
state[SAW_STAR]['*'] = SAW_STAR;
state[SINGLE_COMMENT]['\n'] = TEXT;
state[MULTI_COMMENT]['*'] = SAW_STAR;
}
/**
* Initialize our action table
*/
void initAction( int (*action)[ENCODINGS] )
{
action[TEXT]['\n'] = INC_TOTAL;
action[SAW_STAR]["https://stackoverflow.com/"] = INC_MULTI;
action[MULTI_COMMENT]['\n'] = INC_MULTI | INC_TOTAL; // Multiple actions are bitwise-OR'd
action[SINGLE_COMMENT]['\n'] = INC_SINGLE | INC_TOTAL; // together
action[SAW_SLASH]['\n'] = INC_TOTAL;
}
/**
* Scan the input file for comments
*/
void countComments( FILE *stream, size_t *totalLines, size_t *single, size_t *multi )
{
*totalLines = *single = *multi = 0;
int c;
int curState = TEXT, curAction = NO_ACTION;
while ( ( c = fgetc( stream ) ) != EOF )
{
curAction = action[curState][c]; // Read the action before we overwrite the state
curState = state[curState][c]; // Get the new state (which may be the same as the old state)
if ( curAction & INC_TOTAL ) // Execute the action.
(*totalLines)++;
if ( curAction & INC_SINGLE )
(*single)++;
if ( curAction & INC_MULTI )
(*multi)++;
}
}
/**
* Main function.
*/
int main( int argc, char **argv )
{
/**
* Input sanity check
*/
if ( argc < 2 )
{
fprintf( stderr, "USAGE: %s <filename>\n", argv[0] );
exit( EXIT_FAILURE );
}
/**
* Open the input file
*/
FILE *fp = fopen( argv[1], "r" );
if ( !fp )
{
fprintf( stderr, "Cannot open file %s\n", argv[1] );
exit( EXIT_FAILURE );
}
/**
* If input file was successfully opened, initialize our
* state and action tables.
*/
initState( state );
initAction( action );
size_t totalLines, single, multi;
/**
* Do the thing.
*/
countComments( fp, &totalLines, &single, &multi );
fclose( fp );
printf( "File : %s\n", argv[1] );
printf( "Total lines : %zu\n", totalLines );
printf( "Single-comment lines : %zu\n", single );
printf( "Multi-comment lines : %zu\n", multi );
return EXIT_SUCCESS;
}
Running the file on itself gives us
$ ./comment_counter comment_counter.c
File : comment_counter.c
Total lines : 150
Single-comment lines : 7
Multi-comment lines : 42
which I think is right. This has all the same weaknesses as the first version, just in a different form.
5
solved C Program to count comment lines (// and /* */)