|
|
|
|
The
|
package inputOutput;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.StreamTokenizer;
public class TokenizeStream
{
/**
* Main entry point.
* This expects a single argument:
* the name of the file to be tokenized.
*/
public static void main(String[] args)
{
String fileName = args[0];
System.out.println("Parsing file: " + fileName);
try
{
BufferedReader reader =
new BufferedReader(
new FileReader(fileName) );
StreamTokenizer st = new StreamTokenizer(reader);
st.slashSlashComments(true);
st.slashStarComments(true);
int token = StreamTokenizer.TT_NUMBER; // Just not TT_EOF
while (token != StreamTokenizer.TT_EOF)
{
token = st.nextToken();
switch (token)
{
case StreamTokenizer.TT_NUMBER:
System.out.println("Number: " + st.nval);
break;
case StreamTokenizer.TT_WORD:
System.out.println("Word: " + st.sval);
break;
case StreamTokenizer.TT_EOL:
System.out.println("End of line");
break;
case StreamTokenizer.TT_EOF:
System.out.println("End of Stream");
break;
default:
System.out.println("Character: " +
(char)(token));
break;
}
}
}
catch (FileNotFoundException e)
{
e.printStackTrace();
}
catch (IOException e)
{
e.printStackTrace();
}
}
} |
which, when told to parse itself, outputs (I've removed lots of lines in the middle, or it would get far too long):
Parsing file: TokenizeStream.java
Word: package
Word: inputOutput
Character: ;
Word: import
Word: java.io.BufferedReader
Character: ;
Word: import
Word: java.io.FileReader
Character: ;
Word: import
Word: java.io.FileNotFoundException
Character: ;
Word: import
Word: java.io.IOException
Character: ;
Word: import
Word: java.io.StreamTokenizer
Character: ;
Word: public
Word: class
Word: TokenizeStream
Character: {
...
Word: catch
Character: (
Word: FileNotFoundException
Word: e
Character: )
Character: {
Word: e.printStackTrace
Character: (
Character: )
Character: ;
Character: }
Word: catch
Character: (
Word: IOException
Word: e
Character: )
Character: {
Word: e.printStackTrace
Character: (
Character: )
Character: ;
Character: }
Character: }
Character: }
End of Stream
You can use the StreamTokenizer
class relatively simply, as above. However, many applications
demand more sophisticated usage, where you specify the
attributes of word characters, etc. Details are left to the
imaginative reader...
| The page was last updated February 19, 2008 |