Parser
Home ] Up ] Token ] [ Parser ] Exceptions ] Results ]

 

 

Here's the source code for the expression parser:

package parser;

import java.io.IOException;
import java.io.StreamTokenizer;
import java.io.StringReader;

/**
  This parser supports an expression represented by
  the following grammar:

   expression = term {("+"|"-") term}

   term = factor {("*"|"/"|"%") factor}

   factor = subfactor ["^" subfactor]

   subfactor = ["+"|"-"] subexpression
 
   subexpression = "(" expression ")" | atom

   atom = number

*/

/**
 * Class to implement a recursive-descent expression parser.
 * 
 * @author Bryan J. Higgs
 */
public class Parser
{
  /**
   * Constructor
   */
  public Parser()
  {}
  
  /**
   * Gets the expression string being parsed.
   */
  public String getExpression()
  {
    return m_expression;
  }

  /**
   * Gets the current token for the parse
   * (changes as the parse state changes)
   */
  public Token getCurrentToken()
  {
    return m_currentToken;
  }
  
  /**
   * Top-level parse entry point
   */
  public double parse(String expression) throws ParserException
  {
    System.err.println("\nStarting parse on [" + expression + "]...");

    m_expression = expression;    
    m_reader = new StringReader(expression);
    try
    {
      m_tokenizer = new StreamTokenizer(m_reader);
      m_tokenizer.ordinaryChar('/'); 
                   // Ensure that '/' are not considered comment chars
      //m_tokenizer.slashSlashComments(true);
      //m_tokenizer.slashStarComments(true);
      //m_tokenizer.eolIsSignificant(false);

      nextToken(); // Get the first token from the input.

      double result = parseExpression(); // Parse the expression until the end.

      if (m_currentToken.getType() != Token.TYPE.END_OF_INPUT)
      {
          throw new UnexpectedExtraTokenException(m_currentToken, 
                                                  m_expression);
      }

      System.err.println("Value of [" + expression + "] : " + result);
      return result;
    }
    finally
    {
      // Close the reader, regardless
      m_reader.close();
    }
  }

  //// Private methods ////
  
  /**
   * Returns the next token encountered in the expression
   */
  private void nextToken() throws ParserException
  {
    // Set token to unknown, in case something goes awry
    m_currentToken.setType(Token.TYPE.UNDEFINED);
    m_currentToken.setValue("");
    
    try
    {
      int token = m_tokenizer.nextToken();
      switch (token)
      {
        case StreamTokenizer.TT_NUMBER:
          System.err.println("Token: [Number] " + m_tokenizer.nval);
          m_currentToken.setType(Token.TYPE.NUMBER);
          m_currentToken.setValue("" + m_tokenizer.nval);          
          break;
          
        case StreamTokenizer.TT_EOF:
          // We came to the end of the stream
          System.err.println("Token: [END_OF_INPUT]");
          m_currentToken.setType(Token.TYPE.END_OF_INPUT);
          m_currentToken.setValue("");
          break;
          
        case '+':
          System.err.println("Token: [PLUS]");
          m_currentToken.setType(Token.TYPE.PLUS);
          m_currentToken.setValue("+");
          break;
        case '-':
          System.err.println("Token: [MINUS]");
          m_currentToken.setType(Token.TYPE.MINUS);
          m_currentToken.setValue("-");
          break;
        case '*':
          System.err.println("Token: [MULTIPLY]");
          m_currentToken.setType(Token.TYPE.MULTIPLY);
          m_currentToken.setValue("*");
          break;
        case '/':
          System.err.println("Token: [DIVIDE]");
          m_currentToken.setType(Token.TYPE.DIVIDE);
          m_currentToken.setValue("/");
          break;
        case '%':
          System.err.println("Token: [REMAINDER]");
          m_currentToken.setType(Token.TYPE.REMAINDER);
          m_currentToken.setValue("%");
          break;
        case '^':
          System.err.println("Token: [EXPONENTIATION]");
          m_currentToken.setType(Token.TYPE.EXPONENTIATION);
          m_currentToken.setValue("^");
          break;
        case '(':
          System.err.println("Token: [OPEN_PAREN]");
          m_currentToken.setType(Token.TYPE.OPEN_PAREN);
          m_currentToken.setValue("(");
          break;
        case ')':
          System.err.println("Token: [CLOSE_PAREN]");
          m_currentToken.setType(Token.TYPE.CLOSE_PAREN);
          m_currentToken.setValue(")");
          break;
          
        case StreamTokenizer.TT_WORD:
          m_currentToken.setValue(m_tokenizer.sval);
          throw new UnsupportedFeatureException(m_currentToken, m_expression,
                                                "Variable name");
          // break;
          
        default:
          System.err.println("Stream Token: " + token + 
                             " '" + (char) (token) + "'");
          break;
      }
    
      // If we failed to get a valid token, report it...
      if (m_currentToken.getType() == Token.TYPE.UNDEFINED)
      {
          throw new UnexpectedTokenFoundException(m_currentToken,
                                                  m_expression);
      }
    }
    catch (IOException ioe)
    {
      throw new ParserIOException(getExpression(), 0, ioe);
    }
  }

  /**
   * Parse an expression
   * expression = term { ("+"|"-") term }
   */
  private double parseExpression() throws ParserException
  {
    System.err.println("In expression...");
    double result = parseTerm();
    System.err.println("Back in expression...");

    for ( Token.TYPE type = m_currentToken.getType();
          ( type == Token.TYPE.PLUS || type == Token.TYPE.MINUS );
          type = m_currentToken.getType()
        )
    {
        nextToken();

        double temp = parseTerm();
        System.err.println("Back in expression...");
        
        switch (type)
        {
        case PLUS:
            result += temp;
            break;
        case MINUS:
            result -= temp;
            break;
        }
    }
    return result;
  }

  /**
   * Parse a term
   * term = factor { ("*"|"/"|"%") factor }
   */
  private double parseTerm() throws ParserException
  {
    System.err.println("In term...");
    double result = parseFactor();
    System.err.println("Back in term...");

    for ( Token.TYPE type = m_currentToken.getType();
          ( type == Token.TYPE.MULTIPLY ||
            type == Token.TYPE.DIVIDE || type == Token.TYPE.REMAINDER );
          type = m_currentToken.getType()
        )
    {
        nextToken();

        double temp = parseFactor();
        System.err.println("Back in term...");
        
        switch (type)
        {
        case MULTIPLY:
            result *= temp;
            break;
        case DIVIDE:
            if (temp == 0.0)
            {
                throw new DivisionByZeroException( m_expression,
                                                   0 // Offset
                                                 );
            }
            result /= temp;
            break;
        case REMAINDER:
            if (temp == 0.0)
            {
                throw new DivisionByZeroException( m_expression,
                                                   0 // Offset
                                                 );
            }
            result = (long) result % (long) temp;
            break;
        }
    }

    return result;
  }

  /**
   * Parse a factor
   * factor = subfactor ["^" subfactor]
   */
  private double parseFactor() throws ParserException
  {
    System.err.println("In factor...");
    double result = parseSubfactor();
    System.err.println("Back in factor...");
    
    Token.TYPE type = m_currentToken.getType();
    if ( type == Token.TYPE.EXPONENTIATION )
    {
        nextToken();

        double temp = parseSubfactor();
        System.err.println("Back in factor...");

        result = Math.pow(result, temp);
    }
    return result;
  }

  /**
   * Parse a 'subfactor'
   * subfactor = ["+"|"-"] subexpression
   */
  private double parseSubfactor() throws ParserException
  {
    System.err.println("In subfactor...");
    
    Token.TYPE type = m_currentToken.getType();
    if ( type == Token.TYPE.PLUS || type == Token.TYPE.MINUS )
    {
        nextToken();
    }

    double result = parseSubexpression();
    System.err.println("Back in subfactor...");
    
    switch (type)
    {
    case MINUS:
        result = -result;
        break;
    case PLUS:
        // Do nothing
    }
    return result;
  }

  /**
   * Parse precedence level 5
   * subexpression = "(" expression ")" | atom
   */
  private double parseSubexpression() throws ParserException
  {
    System.err.println("In subexpression...");
    
    double result = 0.0;
    
    Token.TYPE type = m_currentToken.getType();
    if ( type == Token.TYPE.OPEN_PAREN )
    {
        nextToken();

        result = parseExpression();
        System.err.println("Back in subexpression...");

        type = m_currentToken.getType();
        if (type != Token.TYPE.CLOSE_PAREN)
        {
            throw new UnbalancedParenthesesException( m_expression,
                                                      0 // Offset
                                                    );
        }

        nextToken();
    }
    else
    {
        result = parseAtom();
        System.err.println("Back in subexpression...");
    }
    
    return result;
  }

  /**
   * Parse atoms
   * atom = number
   */
  private double parseAtom() throws ParserException
  {
    System.err.println("In atom...");
    
    double result = 0.0;
    
    Token.TYPE type = m_currentToken.getType();
    switch (type)
    {
    case NUMBER:
        try
        {
          result = Double.parseDouble( m_currentToken.getValue() );
        }
        catch (NumberFormatException nfe)
        {
          throw new ParserNumberFormatException(m_currentToken,
                                                m_expression, nfe);
        }
        nextToken();
        break;
        
    default:
        throw new UnexpectedTokenFoundException( m_currentToken,
                                                 m_expression);
    }
    return result;
  }
  
  /**
   * Convenient output for Parser
   * 
   * @return a string representation for the Parser object
   */
  @Override
  public String toString()
  {
    return "Parser: " + getExpression() + "\n" +
                        getCurrentToken();
  }
  
  /**
   * Test Expression method for testing
   * 
   * @param expression
   */
  public static void testExpr(String expression)
  {
    try
    {
      double result = m_parser.parse(expression);
      System.err.println("Result of '" + m_parser.getExpression() + 
                         "' = " + result);
    }
    catch (ParserException pe)
    {
        System.err.println(">>> " + pe.getFormattedMessage());
    }
  }

  /**
   * Main entry point for testing
   * 
   * @param args the command line arguments
   */
  public static void main(String[] args)
  {
    m_parser = new Parser();
    testExpr("1+2*3/4");
    testExpr("  1 + 2 * 3");
    testExpr("3^3");
    testExpr("(1 + 2) * 3");
    testExpr("53%5");
    testExpr("72^0");
    testExpr("+5 + -6 * -(3 * 4)");
    testExpr(" X + 1");
    testExpr("11 - 6 7");
    testExpr(" 3 - +(2 + 4  ");
    testExpr("4 + 5/0");
    testExpr("10 ** 8");
    testExpr("(10-5)*9)");
    testExpr("/8");
  }
  
  //// Private data ////
  private String m_expression = "";
  private StringReader m_reader;
  private StreamTokenizer m_tokenizer;
  private Token m_currentToken = new Token(); // UNDEFINED token type
  
  private static Parser m_parser;
}
The page was last updated February 19, 2008