4
4
* 6 characters, but different somewhere thereafter. Don't count words within
5
5
* string and comments. Make 6 a parameter that can be set from the command
6
6
* line.
7
- * Note: getword comment detection could be improved.
7
+ *
8
8
* By Faisal Saadatmand
9
9
*/
10
10
11
- #include <stdio.h>
12
11
#include <ctype.h>
12
+ #include <stdio.h>
13
+ #include <stdlib.h> /* for malloc && atoi */
13
14
#include <string.h>
14
- #include <stdlib.h> /* for malloc && atoi */
15
15
16
16
#define MAXWORD 100
17
17
#define BUFSIZE 100
18
- #define NKEYS (sizeof keytab / sizeof keytab[0])
19
- #define NSYMBOLS (sizeof symbol / sizeof symbol[0])
20
-
21
- /* functions */
22
- struct tnode * addtree (struct tnode * , char * );
23
- void treeprint (struct tnode * );
24
- int getword (char * , int );
25
- struct tnode * talloc (void ); /* alocate memory to new tree node */
26
- char * strDup (char * ); /* copy string into safe place */
27
- struct key * binsearch (char * , struct key * , int );
28
- void findVariables (struct tnode * , int );
29
- struct tnode * freetree (struct tnode * );
30
-
31
- /* globals */
32
- int buf [BUFSIZE ]; /* buffer from ungetch */
33
- int bufp = 0 ; /* next free position in buf */
34
-
35
- struct tnode { /* the tree node: */
36
- char * word ; /* points to the text */
37
- int count ; /* number of occurrences */
38
- int match ; /* matching word */
39
- struct tnode * left ; /* left child */
40
- struct tnode * right ; /* right child */
18
+ #define NKEYS (int) (sizeof keytab / sizeof keytab[0])
19
+
20
+ /* types */
21
+ struct tnode { /* the tree node: */
22
+ char * word ; /* points to the text */
23
+ int match ; /* number of occurrences */
24
+ struct tnode * left ; /* left child */
25
+ struct tnode * right ; /* right child */
41
26
};
42
27
43
28
struct key {
44
29
char * word ;
45
30
int count ;
46
31
};
47
32
33
+ /* functions */
34
+ int getword (char * , int );
35
+ struct key * binsearch (char * , struct key * , int );
36
+ struct tnode * addtree (struct tnode * , char * , size_t n );
37
+ struct tnode * talloc (void ); /* alocate memory to new tree node */
38
+ char * strDup (char * ); /* copy string into safe place */
39
+ void checkmatch (char * , struct tnode * , size_t , int * );
40
+ void printtree (struct tnode * );
41
+ void freetree (struct tnode * );
42
+
43
+ /* globals */
44
+ int buf [BUFSIZE ]; /* buffer from ungetch */
45
+ int bufp = 0 ; /* next free position in buf */
46
+
48
47
struct key keytab [] = {
49
48
{ "auto" , 0 },
50
49
{ "break" , 0 },
@@ -80,141 +79,71 @@ struct key keytab[] ={
80
79
{ "while" , 0 },
81
80
};
82
81
83
- struct key symbol [] = { /* array is sorted for binary search */
84
- { "\"" , 0 },
85
- { "#" , 0 },
86
- { "*" , 0 },
87
- { "/" , 0 },
88
- { "\\" , 0 },
89
- { "_" , 0 },
90
- };
91
-
92
82
/* addtree: add a node with w, at or below p */
93
- struct tnode * addtree (struct tnode * p , char * w )
83
+ struct tnode * addtree (struct tnode * p , char * w , size_t n )
94
84
{
95
85
int cond ;
86
+ static int found ;
96
87
97
- if (p == NULL ) { /* a new word has arrived */
98
- p = talloc (); /* make a new node */
99
- p -> word = strDup (w ); /* copy data to it */
100
- p -> count = 1 ;
101
- p -> match = 0 ; /* initialize match */
88
+ if (!p ) { /* a new word has arrived */
89
+ p = talloc (); /* make a new node */
90
+ p -> word = strDup (w ); /* copy data to it */
91
+ p -> match = * (& found ); /* p->match = value pointed to by &found */
102
92
p -> left = p -> right = NULL ;
103
- } else if ((cond = strcmp (w , p -> word )) == 0 )
104
- p -> count ++ ; /* repeated word */
105
- else if (cond < 0 ) /* less thant into left subtree */
106
- p -> left = addtree (p -> left , w );
107
- else
108
- p -> right = addtree (p -> right , w );
109
-
110
- return p ;
111
- }
112
-
113
- /* treeprint: in-order print of tree p */
114
- void treeprint (struct tnode * p )
115
- {
116
- if (p != NULL ) {
117
- treeprint (p -> left );
118
- if (p -> match > 0 )
119
- printf ("%4d %4d %s\n" , p -> count , p -> match , p -> word );
120
- treeprint (p -> right );
93
+ } else if ((cond = strcmp (w , p -> word )) < 0 ) { /* less than ? */
94
+ checkmatch (w , p , n , & found );
95
+ p -> left = addtree (p -> left , w , n ); /* go left */
96
+ } else if (cond > 0 ) { /* greater than */
97
+ checkmatch (w , p , n , & found );
98
+ p -> right = addtree (p -> right , w , n ); /* go right */
121
99
}
100
+ found = 0 ; /* reset */
101
+ return p ;
122
102
}
123
103
124
- int getch (void ) /* get a (possibly pushed back) character */
104
+ /* checkmatch: set current node's flag variable and the found variable to 1, if
105
+ * w matches a word in the tree */
106
+ void checkmatch (char * w , struct tnode * p , size_t n , int * found )
125
107
{
126
- return (bufp > 0 ) ? buf [-- bufp ] : getchar ();
108
+ if (!strncmp (w , p -> word , n )) /* is w a match? */
109
+ p -> match = * found = 1 ; /* mark the current and the next nodes */
127
110
}
128
111
129
- void ungetch (int c ) /* push character back on input */
112
+ /* printree: in-order print of tree p */
113
+ void printree (struct tnode * p )
130
114
{
131
- if (bufp >= BUFSIZE )
132
- printf ("ungetch: too many characters\n" );
133
- else
134
- buf [bufp ++ ] = c ;
135
- }
136
-
137
- /* getword: get next word or character from input */
138
- int getword (char * word , int lim )
139
- {
140
- int c , getch (void );
141
- void ungetch (int );
142
- char * w = word ;
143
- struct key * p ;
144
-
145
- while (isspace (c = getch ()))
146
- ;
147
-
148
- if (c != EOF ) {
149
- * w ++ = c ;
150
- * w = '0円' ;
151
- } else
152
- return c ;
153
-
154
- if (!isalpha (c ) && (p = binsearch (word , symbol , NSYMBOLS )) == NULL )
155
- return c ;
156
-
157
- switch (c ) {
158
- case '\\' : /* handle escape sequences */
159
- c = getch ();
160
- break ;
161
- case '\"' : /* skip words inside string constant */
162
- while ((c = getch ()) != '\"' )
163
- if (c == EOF )
164
- return c ;
165
- break ;
166
- case '#' : /* skip preprocessor control lines */
167
- while ((c = getch ()) != '\n' )
168
- ;
169
- ungetch (c );
170
- break ;
171
- case '/' : /* skip words inside C comments */
172
- if ((c = getch ()) == '*' ) {
173
- while ((c = getch ()))
174
- if (c == '*' && (c = getch ()) == '/' )
175
- break ;
176
- else if (c == EOF )
177
- return c ;
178
- } else /* don't skip pointer variables */
179
- ungetch (c );
180
- break ;
181
- default :
182
- for ( ; -- lim > 0 ; w ++ )
183
- if (!isalnum (* w = getch ()) && * w != '_' ) {
184
- ungetch (* w );
185
- break ;
186
- }
187
- break ;
188
- }
189
-
190
- * w = '0円' ;
191
- return word [0 ];
115
+ if (!p ) /* exist condition */
116
+ return ;
117
+ printree (p -> left );
118
+ if (p -> match )
119
+ printf (" %s\n" , p -> word );
120
+ printree (p -> right );
192
121
}
193
122
194
123
/* talloc: make a tnode */
195
124
struct tnode * talloc (void )
196
125
{
197
- return ( struct tnode * ) malloc (sizeof (struct tnode ));
126
+ return malloc (sizeof (struct tnode ));
198
127
}
199
128
200
129
/* freetree: free allocated heap memory of node tree */
201
- struct tnode * freetree (struct tnode * node )
130
+ void freetree (struct tnode * node )
202
131
{
203
- if (node != NULL ) {
204
- freetree (node -> left );
205
- freetree (node -> right );
206
- free (node -> word );
207
- free (node );
208
- }
209
- return node ;
132
+ if (!node )
133
+ return ;
134
+ freetree (node -> left );
135
+ freetree (node -> right );
136
+ free (node -> word );
137
+ free (node );
210
138
}
139
+
211
140
/*strDup: make a duplicate of s */
212
141
char * strDup (char * s )
213
142
{
214
143
char * p ;
215
144
216
- p = ( char * ) malloc (strlen (s ) + 1 ); /* +1 for '0円' */
217
- if (p != NULL )
145
+ p = malloc (strlen (s ) + 1 ); /* +1 for '0円' */
146
+ if (p )
218
147
strcpy (p , s );
219
148
return p ;
220
149
}
@@ -239,49 +168,68 @@ struct key *binsearch(char *word, struct key *tab, int n)
239
168
return NULL ;
240
169
}
241
170
242
- /* findVariables: finds matching variables in a binary search tree, using LDR
243
- * (inorder) traversal */
244
- void findVariables (struct tnode * p , int n )
171
+ /* getword: get next word or character from input */
172
+ int getword (char * word , int lim )
245
173
{
246
- if (p != NULL ) {
247
- findVariables (p -> left , n );
248
- if (p -> left != NULL )
249
- if (strncmp (p -> word , p -> left -> word , n ) == 0 )
250
- p -> match = p -> left -> match = 1 ;
251
- if (p -> right != NULL )
252
- if (strncmp (p -> word , p -> right -> word , n ) == 0 )
253
- p -> match = p -> right -> match = 1 ;
254
- findVariables (p -> right , n );
255
- }
174
+ int c , getch (void );
175
+ void ungetch (int );
176
+ char * w = word ;
177
+
178
+ while (isspace (c = getch ()))
179
+ ;
180
+ if (c != EOF )
181
+ * w ++ = c ;
182
+ if (isalpha (c ) || c == '_' || c == '#' ) {
183
+ for ( ; -- lim > 0 ; ++ w )
184
+ if (!isalnum (* w = getch ()) && * w != '_' ) {
185
+ ungetch (* w );
186
+ break ;
187
+ }
188
+ } else if (c == '\'' ) /* skip character constants */
189
+ while ((c = getch ()) != '\'' )
190
+ ;
191
+ else if (c == '\"' ) { /* skip string constants */
192
+ while ((c = getch ()) != '\"' )
193
+ if (c == '\\' )
194
+ getch ();
195
+ } else if (c == '/' && (c = getch ()) == '*' ) /* skip comments */
196
+ while ((c = getch ()) != EOF )
197
+ if (c == '*' && (c = getch ()) == '/' )
198
+ break ;
199
+ * w = '0円' ;
200
+ return c ;
256
201
}
257
202
258
- /* word frequency count */
259
- int main ( int argc , char * argv [] )
203
+ /* get a (possibly pushed back) character */
204
+ int getch ( void )
260
205
{
261
- struct tnode * root ; /* root node */
262
- struct key * p ; /* currently searched word */
263
- char word [MAXWORD ]; /* currently read word */
264
- int nChar ; /* number of characters to match */
206
+ return (bufp > 0 ) ? buf [-- bufp ] : getchar ();
207
+ }
265
208
266
- if (argc != 2 )
267
- nChar = 6 ;
209
+ /* push character back on input */
210
+ void ungetch (int c )
211
+ {
212
+ if (bufp >= BUFSIZE )
213
+ printf ("ungetch: too many characters\n" );
268
214
else
269
- nChar = atoi (argv [1 ]);
215
+ buf [bufp ++ ] = c ;
216
+ }
270
217
271
- root = NULL ; /* initialize root node */
272
- while (getword (word , MAXWORD ) != EOF )
273
- if ((isalpha (word [0 ]) || word [0 ] == '_' || word [0 ] == '*' )
274
- && (int ) strlen (word ) > nChar ) {
275
- if ((p = binsearch (word , keytab , NKEYS )) == NULL ) /* skip C */
276
- root = addtree (root , word ); /* reserved words */
277
- else
278
- ++ p -> count ; /* not necessary */
279
- }
280
- findVariables (root , nChar );
281
- treeprint (root );
282
- root = freetree (root ); /* clean up */
218
+ int main (int argc , char * argv [])
219
+ {
220
+ struct tnode * root ; /* root node */
221
+ char word [MAXWORD ]; /* currently read word */
222
+ size_t nChar ; /* number of characters to match */
283
223
284
- for (size_t i = 0 ; i < sizeof (keytab ) / sizeof (keytab [0 ]); ++ i )
285
- printf ("%s %i\n" , keytab [i ].word , keytab [i ].count );
224
+ nChar = (-- argc == 1 ) ? atoi (* ++ argv ) : 6 ; /* Note: no input error check */
225
+ root = NULL ;
226
+ while (getword (word , MAXWORD ) != EOF )
227
+ if ((isalpha (word [0 ]) || word [0 ] == '_' ) && strlen (word ) >= nChar &&
228
+ !binsearch (word , keytab , NKEYS )) /* skip reserved words */
229
+ root = addtree (root , word , nChar );
230
+ printree (root );
231
+ /* clean up */
232
+ freetree (root );
233
+ root = NULL ;
286
234
return 0 ;
287
235
}
0 commit comments