/*
 * (C) Copyright Keith Visco 1999  All rights reserved.
 *
 * The program is provided "as is" without any warranty express or
 * implied, including the warranty of non-infringement and the implied
 * warranties of merchantibility and fitness for a particular purpose.
 * The Copyright owner will not be liable for any damages suffered by
 * you as a result of using the Program. In no event will the Copyright
 * owner be liable for any special, indirect or consequential damages or
 * lost profits even if the Copyright owner has been advised of the
 * possibility of their occurrence.
 */
package com.kvisco.xsl.util;

import com.kvisco.util.*;
import com.kvisco.xsl.Names;
import java.util.Hashtable;

/**
 * A Lexical Analizer of XSL patterns and expressions
 * @author <a href="mailto:kvisco@ziplink.net">Keith Visco</a>
**/
public class ExprLexer {
    
    
    //-- delimiters
    public static final char FORWARD_SLASH   = '/';
    public static final char L_PAREN         = '(';
    public static final char R_PAREN         = ')';
    public static final char L_BRACKET       = '[';
    public static final char R_BRACKET       = ']';
    public static final char PERIOD          = '.';
    public static final char COMMA           = ',';
    public static final char AT_SYMBOL       = '@';
    public static final char DOLLAR_SYMBOL   = '$';
    public static final char S_QUOTE         = '\'';
    public static final char D_QUOTE         = '\"';
    public static final char VERT_BAR        = '|';
    
    //-- Expression whitespace
    public static final char SPACE           = ' ';
    public static final char TAB             = '\t';
    public static final char LF              = '\n';
    public static final char CR              = '\r';
    //-- simple operators
    public static final char EQUALS_OP       = '=';
    public static final char ADDITION_OP     = '+';
    public static final char SUBTRACTION_OP  = '-';
    public static final char LESS_THAN_OP    = '<';
    public static final char GREATER_THAN_OP = '>';
    public static final char MULTIPLY_OP     = '*';
    
    
    /**
     * delimiter set
    **/
    private static final char[] delimiters = {
        FORWARD_SLASH, VERT_BAR,
        L_PAREN,       R_PAREN, 
        L_BRACKET,     R_BRACKET,
        PERIOD,        COMMA, 
        AT_SYMBOL,     DOLLAR_SYMBOL, 
        S_QUOTE,       D_QUOTE,
        
        //-- whitespace
        SPACE, TAB, CR, LF,
        
        //-- operator tokens
        EQUALS_OP, 
        ADDITION_OP,
        SUBTRACTION_OP,
        MULTIPLY_OP,
        LESS_THAN_OP,
        GREATER_THAN_OP
    }; //-- delimiters
    
    /**
     * the delimiter set of an ExprLexer
    **/
    public static final Token[] tokenSet = 
    { 
        
        //-- Operators
        new Token("and", Token.AND_OPNAME),
        new Token("or",  Token.OR_OPNAME),
        new Token("mod", Token.MOD_OPNAME),
        new Token("div", Token.DIV_OPNAME),
        new Token("quo", Token.QUO_OPNAME),

        //-- NodeType tokens
        new Token("comment", Token.COMMENT),
        new Token("pi",      Token.PI),
        new Token("node",    Token.NODE),
        new Token("text",    Token.TEXT),
        
        //-- basic
        new Token("*",   Token.WILDCARD),
        
        //-- Axis Identifiers
        new Token("from-ancestors", Token.ANCESTORS_AXIS),
        new Token("from-ancestors-or-self", Token.ANCESTORS_OR_SELF),
        new Token("from-attributes", Token.ATTRIBUTES_AXIS ),
        new Token("from-children", Token.CHILDREN_AXIS),
        new Token("from-descendants", Token.DESCENDANTS_AXIS),
        new Token("from-descendants-or-self", Token.DESCENDANTS_OR_SELF),
        new Token("from-following", Token.FOLLOWING_AXIS),
        new Token("from-following-siblings", Token.FOLLOWING_SIBLINGS_AXIS),
        new Token("from-parent", Token.PARENT_AXIS),
        new Token("from-preceding", Token.PRECEDING_AXIS),
        new Token("from-preceding-siblings", Token.PRECEDING_SIBLINGS_AXIS),
        new Token("from-self", Token.SELF_AXIS),
    };
     
    
   //private static Hashtable groupDelimiters      = null;
   //private QuickStack groupingStack = null;

    private  int currentIndex = 0;
    
    List tokens = null;
    
    private Token lastToken = new Token(null, Token.NULL);
    
      //---------------/
     //- Contructors -/
    //---------------/
    
    /**
     * Creates a new ExprLexer using the given String
    **/
    public ExprLexer(String pattern) {
        super();
        tokens = new List();
        //groupingStack = new QuickStack();
        parse(pattern);
    } //-- ExprLexer

    /**
     * Advances the index of this tokenizer ahead by the given offset
    **/
    public void advance(int offset) {
        int idx = currentIndex+offset;
        if ((idx >= 0) && (idx < tokens.size())) 
            currentIndex = idx;
        else
            currentIndex = tokens.size();
    } //-- lookAhead 

    /**
     * Counts the number of times nextToken can be called without
     * returning null
    **/
    public int countTokens() {
        return tokens.size() - currentIndex;
    }
    
    public int getPosition() {
        return currentIndex;
    } //-- getPosition

    /**
     * Determines if there are any tokens available
     * @return true if there are tokens available, otherwise false
    **/
    public boolean hasMoreTokens() {
        return (countTokens() > 0);
    } //-- hasMoreTokens

    /**
     * Determines if the specified char is a delimiter
     * @param ch the char to compare to the delimiters
     * @return true if the String argument is a delimiter
    **/
    public boolean isDelimiter(char ch) {
        for (int i = 0; i < delimiters.length; i++)
            if (ch == delimiters[i]) return true;
        return false;
    } //-- isDelimiter
    
    public boolean isOperator(Token token) {
        if (token == null) return false;
        if (isBinaryOp(token)) return true;
        switch (token.type) {
            case Token.PARENT_OP:
            case Token.ANCESTOR_OP:
            case Token.UNION_OP:
                return true;
            default:
                return false;
        }
    } //-- isOperator

    public static boolean isAdditiveOp(Token token) {
        if (token == null) return false;
        switch (token.type) {
            case Token.ADDITION_OP:
            case Token.SUBTRACTION_OP:
                return true;
            default:
                return false;
        }
    } //-- isAdditiveOp
    
    public static boolean isBinaryOp(Token token) {
        if (token == null) return false;
        
        switch (token.type) {
            case Token.AND_OPNAME:
            case Token.OR_OPNAME:
            case Token.MOD_OPNAME:
            case Token.DIV_OPNAME:
            case Token.QUO_OPNAME:
            case Token.ADDITION_OP:
            case Token.SUBTRACTION_OP:
            case Token.EQUALS_OP:
            case Token.NOT_EQUALS_OP:
            case Token.MULTIPLY_OP:
            case Token.LESS_THAN_OP:
            case Token.LESS_OR_EQ_OP:
            case Token.GREATER_THAN_OP:
            case Token.GREATER_OR_EQ_OP:
                return true;
            default:
                return false;
        }
        
    } //-- isBinaryOp

    public static boolean isEqualityOp(Token token) {
        if (token == null) return false;
        switch (token.type) {
            case Token.EQUALS_OP:
            case Token.NOT_EQUALS_OP:
                return true;
            default:
                return false;
        }
    } //-- isRelationalOp

    public static boolean isRelationalOp(Token token) {
        if (token == null) return false;
        switch (token.type) {
            case Token.LESS_THAN_OP:
            case Token.LESS_OR_EQ_OP:
            case Token.GREATER_THAN_OP:
            case Token.GREATER_OR_EQ_OP:
                return true;
            default:
                return false;
        }
    } //-- isRelationalOp

    public static boolean isMultiplicativeOp(Token token) {
        if (token == null) return false;
        switch (token.type) {
            case Token.MOD_OPNAME:
            case Token.DIV_OPNAME:
            case Token.QUO_OPNAME:
            case Token.MULTIPLY_OP:
                return true;
            default:
                return false;
        }
    } //-- isMultiplicativeOp
    
    /**
     * Returns true if the char argument is a digit
     * @return true if the char argument is a digit
    **/
    public static boolean isDigit(char ch) {
        return ((ch >= '0') && (ch <= '9'));
    } //-- isDigit
    
    /**
     * Returns true if the char argument is a letter
     * @return true if the char argument is a letter
    **/
    public static boolean isLetter(char ch) {
        if ((ch >= 'a' ) && (ch <= 'z' )) return true;
        if ((ch >= 'A' ) && (ch <= 'Z' )) return true;
        return false;
    } //-- isLetter
    
    /**
     * Returns true if the char argument is an NCNameChar,
     * as defined by the XML Namespaces recommendation
     * http://www.w3c.org/TR/1999/REC-xml-names-199904114
     * @return true if the char argument is an NCNameChar
    **/
    public static boolean isNCNameChar(char ch) {
        if (isLetter(ch)) return true;
        if (isDigit(ch))  return true;
        return ((ch == '.') ||(ch == '_') || (ch == '-'));
    } //-- isNCNameChar
    
    /**
     * Returns true if the char argument is an QName character,
     * as defined by the XML Namespaces recommendation
     * http://www.w3c.org/TR/1999/REC-xml-names-199904114
     * @return true if the char argument is an QName character
    **/
    public static boolean isQNameChar(char ch) {
        return (isNCNameChar(ch) || (ch == ':'));
    } //-- isQNameChar
    
    public static boolean isWhitespace(char ch) {
        switch(ch) {
            case SPACE:
            case TAB:
            case LF:
            case CR:
                return true;
            default:
                return false;
        }
    } //-- isWhitespace
    
    
    /**
     * Allows looking ahead for tokens without affecting the
     * token sequence as called by nextToken or previousToken
    **/
    public Token lookAhead(int offset) {
        int idx = currentIndex+offset;
        if ((idx >= 0) && (idx < tokens.size())) 
            return (Token)tokens.get(idx);
        else
            return null;
    } //-- lookAhead 
    
    /**
     * Retrieves the next available token
     * @return the next available token or null if there are none
    **/
    public Token nextToken() {
        if (currentIndex < tokens.size()) {
            return (Token) tokens.get(currentIndex++);
        }
        return null;
    }

    /**
     * Moves the position of this Lexer back one
    **/
    public void pushBack() {
        if (currentIndex > 0) --currentIndex;
    } //-- pushBack
    
    /**
     * Resets the position of the token pointer to the beginning
    **/
    public void resetPosition() {
        currentIndex = 0;
    } //-- resetPosition
    
    /**
     * Sets the current position of this tokenizer. 
     * @param position the index value to set the current position to.
     * if position is greater than the number of tokens, the tokenizer
     * is advanced to the end. If the given position is less than 0,
     * the tokenizer is positioned at the beginning
    **/
    public void setPosition(int position) {
        if ((position >= 0) && (position < tokens.size())) 
            currentIndex = position;
        else if (position < 0) {
            currentIndex = 0;
        }
        else
            currentIndex = tokens.size();
    } //-- setPosition
    
    public String toString() {
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < tokens.size(); i++) {
            sb.append(tokens.get(i));
        }
        return sb.toString();
    } //-- toString
    
      //-------------------/
     //- Private Methods -/
    //-------------------/

    private void addToken(Token token) {
        tokens.add(token);
        lastToken = token;
    } //-- addToken
    
    private Token delimiterToken(char ch) {
        
        switch(ch) {
            case FORWARD_SLASH:
                return new Token(null, Token.PARENT_OP);
            case VERT_BAR:
                return new Token(null, Token.UNION_OP);
            case L_PAREN:
                return new Token(null, Token.L_PAREN);
            case R_PAREN:
                return new Token(null, Token.R_PAREN);
            case L_BRACKET:
                return new Token(null, Token.L_BRACKET);
            case R_BRACKET:
                return new Token(null, Token.R_BRACKET);
            case PERIOD:
                return new Token(null, Token.PERIOD);
            case COMMA:
                return new Token(null, Token.COMMA);
            case AT_SYMBOL:
                return new Token(null, Token.AT_SYMBOL);
            case EQUALS_OP:
                return new Token(null, Token.EQUALS_OP);
            case ADDITION_OP:
                return new Token(null, Token.ADDITION_OP);
            case SUBTRACTION_OP:
                return new Token(null, Token.SUBTRACTION_OP);
            case MULTIPLY_OP:
                return new Token(null, Token.MULTIPLY_OP);
            case LESS_THAN_OP:
                return new Token(null, Token.LESS_THAN_OP);
            case GREATER_THAN_OP:
                return new Token(null, Token.GREATER_THAN_OP);

            default:
                break;
        }
        return null;
    } //-- delimiterToken
    
    private void freeBuffer(StringBuffer buffer, char ch) {
        if (buffer.length() > 0) {
            String tokenString = buffer.toString();
            Token tok = match(tokenString, ch);
            if (tok != null) addToken(tok);
            else addToken( new Token(tokenString, Token.CNAME));
            buffer.setLength(0);
        }
    } //-- freeBuffer

    /**
     * Parses the given String into tokens and adds them into
     * the tokens List
    **/
    private void parse(String pattern) {
        
        boolean inLiteral = false;
        boolean inNumber  = false;
        boolean inVarRef  = false;
        
        StringBuffer tokBuf = new StringBuffer();
        List masterSet = new List(tokenSet.length);
        
        for (int i = 0; i < tokenSet.length; i++)
            masterSet.add(tokenSet[i]);
         
        List matches = masterSet;
        
        char ch     = '\0';
        
        char[] chars = pattern.toCharArray();
        
        char endQuote = S_QUOTE;
        
        int currentIdx = 0;
        
        
        while (currentIdx < chars.length) {
            
            char prevCh = ch;
            ch = chars[currentIdx++];
            
            //-- handle literals
            if (inLiteral) {
                switch(ch) {
                    case S_QUOTE:
                    case D_QUOTE:
                        if (ch == endQuote) {
                            inLiteral = false;
                            addToken(new Token(tokBuf.toString(), 
                                                    Token.LITERAL));
                            tokBuf.setLength(0);
                            break;
                        }
                        //-- do not break;
                    default:
                        tokBuf.append(ch);
                        break;
                }
            }
            //-- handle current number
            else if (inNumber) {
                if (isDigit(ch) || (ch == '.')) {
                    tokBuf.append(ch);
                }
                else {
                    inNumber = false;
                    addToken(new Token(tokBuf.toString(), Token.NUMBER));
                    tokBuf.setLength(0);
                    --currentIdx;
                    /*
                    if (isWhitespace(ch)) continue;
                    
                    Token tok = delimiterToken(ch);
                    if (tok == null) {
                        addToken(new Token(tokBuf.toString(), Token.ERROR));
                        tokBuf.setLength(0);
                    }
                    else if (isOperator(tok)) addToken(tok);
                    else if (tok.type == Token.R_PAREN) addToken(tok);
                    else if (tok.type == Token.R_BRACKET) addToken(tok);
                    else if (tok.type == Token.COMMA) addToken(tok);
                    else {
                        addToken(new Token(tokBuf.toString(), Token.ERROR));
                        tokBuf.setLength(0);
                        break;
                    }
                    */
                }
            }
            else if (inVarRef) {
                
                if (ch == '-') {
                    //-- lookAhead
                    char next = '\0';
                    if (currentIdx<chars.length) next = chars[currentIdx];
                    if (!isNCNameChar(next)) inVarRef = false;
                    else tokBuf.append(ch);
                }
                else if (isNCNameChar(ch)) tokBuf.append(ch);
                else inVarRef = false;
                
                //-- clean up buffer
                if (!inVarRef) {
                    addToken(new Token(tokBuf.toString(), 
                        Token.VAR_REFERENCE));
                    tokBuf.setLength(0);
                    --currentIdx;
                }
            }
            else if (isDelimiter(ch)) {
                
                Token tok = null;
                
                switch (ch) {
                    
                    //-- ignore whitespace
                    case SPACE:
                    case TAB:
                    case CR:
                    case LF:
                        freeBuffer(tokBuf, ch);
                        break;
                    case L_PAREN:
                        freeBuffer(tokBuf, ch);
                        if (lastToken.type == Token.CNAME) {
                            lastToken.type = Token.FUNCTION_NAME;
                        }
                        addToken(new Token(null, Token.L_PAREN));
                        break;
                    case R_PAREN:
                        freeBuffer(tokBuf, ch);
                        addToken(new Token(null, Token.R_PAREN));
                        break;
                    case S_QUOTE:
                    case D_QUOTE:
                        freeBuffer(tokBuf, ch);
                        inLiteral = true;
                        endQuote = ch;
                        break;
                    case FORWARD_SLASH: 
                        freeBuffer(tokBuf, ch);
                        switch(lastToken.type) {
                            case Token.PARENT_OP:
                                lastToken.type = Token.ANCESTOR_OP;
                                break;
                            case Token.ANCESTOR_OP:
                                addToken(new Token(null, Token.ERROR));
                                return;
                            default:
                                addToken(new Token(null, Token.PARENT_OP));
                                break;
                        }
                        break;
                    case PERIOD: 
                    
                        if (tokBuf.length() > 0)
                            tokBuf.append(ch);
                        else {
                            switch (lastToken.type) {
                                case Token.SELF_NODE:
                                    lastToken.type = Token.PARENT_NODE;
                                    break;
                                case Token.PARENT_NODE:
                                    addToken(new Token(null, Token.ERROR));
                                    return;
                                default:
                                    addToken(new Token(null, Token.SELF_NODE));
                                    break;
                            }
                        }
                        break;
                    case DOLLAR_SYMBOL:
                        freeBuffer(tokBuf, ch);
                        inVarRef = true;
                        //addToken(new Token(null, Token.VAR_REFERENCE));
                        break;
                    case AT_SYMBOL:
                        freeBuffer(tokBuf, ch);
                        addToken(new Token("@", Token.AT_SYMBOL));
                        break;
                    case EQUALS_OP:
                        switch (lastToken.type) {
                            case Token.LESS_THAN_OP:
                                freeBuffer(tokBuf, ch);
                                lastToken.type = Token.LESS_OR_EQ_OP;
                                break;
                            case Token.GREATER_THAN_OP:
                                freeBuffer(tokBuf, ch);
                                lastToken.type = Token.GREATER_OR_EQ_OP;
                                break;
                            default:
                                break;
                        }
                        //-- important no break here
                    case MULTIPLY_OP:
                    case ADDITION_OP:
                    case SUBTRACTION_OP:
                    case LESS_THAN_OP:
                    case GREATER_THAN_OP:
                    
                        if ((tokBuf.length() > 0) && (ch != '-' )) {
                            freeBuffer(tokBuf, ch);
                            addToken( delimiterToken(ch) );
                            break;
                        }
                        
                        //-- Check for operators. An Operator token can
                        //-- only be matched if there is a preceding token,
                        //-- and that token is not @,(,[, or an Operator
                        if (!isOperator(lastToken)) {
                            switch (lastToken.type) {
                                case Token.AT_SYMBOL:
                                case Token.L_PAREN:
                                case Token.L_BRACKET:
                                case Token.COMMA:
                                case Token.NULL:
                                    tokBuf.append(ch);
                                    break;
                                default:
                                    freeBuffer(tokBuf, ch);
                                    addToken( delimiterToken(ch) );
                                    break;
                            }
                        }
                        else {
                            tokBuf.append(ch);
                            break;
                        }
                        break;
                    default:
                        freeBuffer(tokBuf, ch);
                        //-- add delimiter Token
                        tok = delimiterToken(ch);
                        if (tok != null) addToken(tok);
                        break;
                }
            }
            //-- check for number
            else if (isDigit(ch)) {
                if ((tokBuf.length() == 0) || 
                    ((tokBuf.length() == 1) && (prevCh == '-' )))
                {
                    inNumber = true;
                    tokBuf.append(ch);
                }
                else {
                    
                    Token tok = match(tokBuf.toString(), ch);
                    if ((tok != null) && isOperator(tok)) {
                        //- need to check prev token here
                        //-- add later
                        //
                        addToken(tok);
                        tokBuf.setLength(0);
                        tokBuf.append(ch);
                        inNumber = true;
                    }
                    else tokBuf.append(ch);
                }
            }
            else {
                tokBuf.append(ch);
            }
        } //-- end checking characters
        
        if (inNumber)
            addToken(new Token(tokBuf.toString(), Token.NUMBER));
        else if (inLiteral)
            addToken(new Token(tokBuf.toString(), Token.ERROR));
        else if (inVarRef) 
            addToken(new Token(tokBuf.toString(), Token.VAR_REFERENCE));
        else 
            freeBuffer(tokBuf, ch);
            
    } //-- parsePattern

    private Token match(String str, char ch) {
        
        if (str == null) return new Token(null, Token.NULL);
        for (int i = 0; i < tokenSet.length; i++) {
            Token token = tokenSet[i];
            if (token.value.equals(str)) return token;
        }
        return null;
    } //-- match
        
    /*  For Debugging */
    public static void main(String[] args) {

        //String pattern = "element[not(position()=1)][1]";
        //String pattern = "pattern-test-case/pattern-test-case/@data";
        //String pattern = "position-child[@test-att and ";
        //pattern += "(not(position()=1) and ";
        //pattern += "not(position()=last()))]";
        String pattern = "position() mod 2 = 0";
        
        System.out.println("Expr: "+pattern);
        ExprLexer exprLexer = new ExprLexer(pattern);
        while(exprLexer.hasMoreTokens()) {
            Token tok = exprLexer.nextToken();
            
            String ts = tok.toString();
            System.out.print("TOKEN: "+ts);
            
            for (int i = ts.length(); i < 20; i++) System.out.print(" ");
            
            System.out.println(" type: " + tok.type);
        }
    } //-- main
    
    
} //-- ExprLexer

