// QWeb - An SGML Web Browser
// Copyright (C) 1997  Sean Vyain
// svyain@mail.tds.net
// smvyain@softart.com
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
extern "C" {
#include <stdio.h>
#include <ctype.h>
}
#include "ConsoleWindow.h"
#include "DtdManager.h"
#include "SgmlParser.h"
#include "StyleSheet.h"

struct OpStag : public SgmlParser::Op {
    STag               stag;
    ContentModelState* state;
    OpStag( STag& _stag, ContentModelState* _state ) : SgmlParser::Op( SgmlParser::Op::Stag ), stag( _stag ), state( _state ) {}
};

struct OpEtag : public SgmlParser::Op {
    QString             etag;
    SgmlParser::State*  state;
    OpEtag( QString _etag, SgmlParser::State* _state ) : SgmlParser::Op( SgmlParser::Op::Etag ), etag( _etag ), state( _state ) {}
};

STag::STag( STag& src )
        : name( src.name.copy() )
{
    for ( Attr* a = src.attrs.first(); a; a = src.attrs.next() ) {
        attrs.append( new Attr( *a ) );
    }
}

STag::~STag()
{
    while( attrs.first() ) {
	delete attrs.first();
	attrs.remove();
    }
}

const QString* STag::find( QString attr ) const
{
    QListIterator<Attr> i( attrs );
    for ( i.toFirst(); i.current(); ++i ) {
        if ( i.current()->name.upper() == attr ) {
            return &i.current()->value;
        }
    }
    return 0;
}

void STag::printAttrs()
{
    printf( "STag::printAttrs() -- tag = '%s'\n", name.data() );
    for ( attrs.first(); attrs.current(); attrs.next() ) {
        printf( "STag::printAttrs() -- attr = '%s', value = '%s'\n", attrs.current()->name.data(), attrs.current()->value.data() );
    }
}

SgmlParser::SgmlParser()
        : _state( ContentState ),
          _dtd( 0 ),
          _styleSheet( 0 ),
          _lexer( 0 ),
          _endOfData( FALSE )
{
    _lexer = new SgmlLexer;
    connect( this  , SIGNAL( fwdData( const char*, int ) )           , _lexer, SLOT( data( const char*, int ) ) );
    connect( this  , SIGNAL( fwdEndOfData() )                        , _lexer, SLOT( endOfData() ) );
    connect( _lexer, SIGNAL( token( SgmlLexer::Token, const char* ) ), this  , SLOT( token( SgmlLexer::Token, const char* ) ) );
    connect( _lexer, SIGNAL( done() )                                , this  , SLOT( lexerDone() ) );
}

SgmlParser::~SgmlParser()
{
//    printf( "SgmlParser::~SgmlParser()\n" );
    emit done();

    while ( _stack.first() ) {
	delete _stack.first();
	_stack.remove();
    }
    while ( _ops.first() ) {
	delete _ops.first();
	_ops.remove();
    }
    while ( _tagStack.first() ) {
	delete _tagStack.first();
	_tagStack.remove();
    }
    while ( _styleStack.first() ) {
	delete _styleStack.first();
	_styleStack.remove();
    }
}

void SgmlParser::dtd( Dtd* dtd )
{
//    printf( "SgmlParser::dtd() -- dtd = %d\n", dtd );

    if ( dtd ) {
        _dtd = dtd;
        _lexer->dtd( dtd );
        if ( _styleSheet ) {
            emit fwdData( _leftover, _leftover.length() );
            _leftover.truncate( 0 );
            if ( _endOfData ) {
                emit fwdEndOfData();
            }
        }
    }
}

void SgmlParser::styleSheet( StyleSheet* styleSheet )
{
    if ( styleSheet ) {
        _styleSheet = styleSheet;
        if ( _dtd ) {
            emit fwdData( _leftover, _leftover.length() );
            _leftover.truncate( 0 );
            if ( _endOfData ) {
                emit fwdEndOfData();
            }
        }
    }
}

void SgmlParser::lexerDone()
{
//    printf( "SgmlParser::lexerDone()\n" );
    if ( _stack.count() ) {
        if ( _stack.count() > 0 ) {
            if ( doEndTag( _dtd->start()->name(), _stack.count() ) ) {
                // Commit changes.
                commit();
            } else {
                // Roll back changes.
                rollback();
                
                // Try end tag hack to cope with overlapping elements.
                if ( doEndTagHack( _dtd->start()->name() ) ) {
                    commit();
                } else {
                    rollback();
                    console->warning( "SgmlParser::lexerDone() -- parse error, cannot close document instance" );
                    logStack();
                }
            }
        }
    }

    delete this;
}

void SgmlParser::data( const char* bytes, int length )
{
//    printf( "SgmlParser::data() -- length = %d\n", length );
    
    if ( ( _dtd ) && ( _styleSheet ) ) {
        emit fwdData( bytes, length );
    } else {
        for ( int i = 0; i < length; i++ ) {
            _leftover += bytes[i];
        }

        if ( _dtdName.length() == 0 ) {
            // Search for <!DOCTYPE...
            int len = _leftover.length();
            int idx1;
            
            for ( idx1 = 0; ( idx1 < len ) && ( isspace( _leftover[idx1] ) ); idx1++ );

            if ( idx1 >= len ) {
                // Need more data.
                return;
            }

            if ( ( _leftover[idx1++] == '<' ) && ( _leftover[idx1++] == '!' ) ) {
                // Look for DOCTYPE keyword.
                for ( ; ( idx1 < len ) && ( isspace( _leftover[idx1] ) ); idx1++ );

                if ( idx1 >= len ) {
                    // Need more data.
                    return;
                }

                if ( _leftover.mid( idx1, 7 ).upper() == "DOCTYPE" ) {
                    for ( idx1 = idx1 + 7; ( idx1 < len ) && ( isspace( _leftover[idx1] ) ); idx1++ );
                    
                    if ( idx1 >= len ) {
                        // Need more data.
                        return;
                    }

                    // Skip document type.
                    _dtdType = "";
                    for ( ; ( idx1 < len ) && ( !isspace( _leftover[idx1] ) ); idx1++ ) {
                        _dtdType += _leftover[idx1];
                    }
                    for ( ; ( idx1 < len ) && ( isspace( _leftover[idx1] ) ); idx1++ );

                    if ( idx1 >= len ) {
                        // Need more data.
                        return;
                    }

                    if ( _leftover.mid( idx1, 6 ).upper() == "PUBLIC" ) {
                        for ( idx1 = idx1 + 6; ( idx1 < len ) && ( isspace( _leftover[idx1] ) ); idx1++ );
                    
                        if ( idx1 >= len ) {
                            // Need more data.
                            return;
                        }

                        // Get document type.
                        char delim = _leftover[idx1];
                        int idx2 = _leftover.find( delim, idx1 + 1 );
                        if ( idx2 < 0 ) {
                            // Need more data.
                            return;
                        }

                        QString tmp = _leftover.mid( idx1 + 1, idx2 - idx1 - 1 );

                        idx1 = _leftover.find( '>', idx2 );
                        if ( idx1 < 0 ) {
                            // Need more data.
                            return;
                        }

                        _leftover.remove( 0, idx1 + 1 );
                        _dtdName = tmp;
                        console->debug( "SgmlParser::data() -- dtd = '%s'", _dtdName.data() );
                        emit dtdSelected( _dtdName );
                        dtdManager->request( this, _dtdType, _dtdName );
                        styleSheetManager->request( this, _dtdType, _dtdName );
                        return;
                    }
                }
            }

            // Default to HTML DTD.
            _dtdType = "HTML";
            _dtdName = "-//W3C//DTD HTML 3.2//EN";
            console->debug( "SgmlParser::data() -- dtd = '%s'", _dtdName.data() );
            emit dtdSelected( _dtdName );
            dtdManager->request( this, _dtdType, _dtdName );
            styleSheetManager->request( this, _dtdType, _dtdName );
        }
    }
}

void SgmlParser::endOfData()
{
    _endOfData = TRUE;
    if ( ( _dtd ) && ( _styleSheet ) ) {
        emit fwdEndOfData();
    }
}

void SgmlParser::token( SgmlLexer::Token token, const char* text )
{
    switch ( _state ) {
        case ContentState:
            switch ( token ) {
                case SgmlLexer::StartTagOpen:
                    _state = StartTag;
                    _lexer->mode( SgmlLexer::MarkupMode );
                    break;
			
                case SgmlLexer::EndTagOpen:
                    _state = EndTag;
                    _lexer->mode( SgmlLexer::MarkupMode );
                    break;
			
                case SgmlLexer::Content:
//                    printf( "SgmlParser::token() -- content = '%s'\n", (const char*)text );
                    if ( ( !doContent( text ) ) && ( QString( text ).stripWhiteSpace().length() > 0 ) ) {
                        console->warning( "SgmlParser::token() -- parse error, unexpected content = '%s' (ignoring)", text );
                        logStack();
                    }
                    break;

                case SgmlLexer::MarkupDeclOpen:
                    _state = MarkupDeclState;
                    _lexer->mode( SgmlLexer::MarkupMode );
                    break;
			
                default:
                    console->warning( "SgmlParser::token() -- ignoring token = %d, text = '%s'", text );
                    logStack();
                    break;
            }
            break;
		
        case StartTag:
            _attrName = "";
            _attrValue = "";
            _tag = QString( text ).upper();
            _startTag.name = _tag.copy();
	    while ( _startTag.attrs.first() ) {
		delete _startTag.attrs.first();
		_startTag.attrs.remove();
	    }
            _state = AttrName;
            break;

        case AttrName:
            if ( token == SgmlLexer::MarkupClose ) {
                if ( ( _dtd->getElement( _startTag.name ) ) && ( doStartTag( _startTag, _stack.count() ) ) ) {
                    // Commit changes.
                    commit();
                } else {
                    // Roll back changes.
                    rollback();
                    console->warning( "SgmlParser::token() -- parse error, unexpected start tag = '<%s>' (ignoring)", _tag.data() );
                    logStack();
                }
                _state = ContentState;
                _lexer->mode( SgmlLexer::PcdataMode );
            } else /*if ( token == SgmlLexer::Identifier )*/ {
//                printf( "SgmlParser::token() -- attr name = '%s'\n", (const char*)text );
                _attrName = text;
                _attrValue = "";
                _state = AttrEqual;
            }
            break;

        case AttrEqual:
            if ( token == SgmlLexer::MarkupClose ) {
                _startTag.attrs.append( new STag::Attr( _attrName, _attrValue ) );
                if ( ( _dtd->getElement( _startTag.name ) ) && ( doStartTag( _startTag, _stack.count() ) ) ) {
                    // Commit changes.
                    commit();
                } else {
                    // Roll back changes.
                    rollback();
                    console->warning( "SgmlParser::token() -- parse error, unexpected start tag = '<%s>' (ignoring)", _tag.data() );
                    logStack();
                }
                _state = ContentState;
                _lexer->mode( SgmlLexer::PcdataMode );
            } else if ( token == SgmlLexer::EqualSign ) {
                _state = AttrValue;
            } else if ( token == SgmlLexer::Identifier ) {
                _startTag.attrs.append( new STag::Attr( _attrName, _attrValue ) );
//                printf( "SgmlParser::token() -- attr name = '%s'\n", (const char*)text );
                _attrName = text;
                _attrValue = "";
            }
            break;

        case AttrValue:
            if ( ( token == SgmlLexer::Identifier ) || ( token == SgmlLexer::StringLiteral ) || ( token == SgmlLexer::Integer ) ) {
//                printf( "SgmlParser::token() -- attr value = '%s'\n", (const char*)text );
                _attrValue = text;
                _startTag.attrs.append( new STag::Attr( _attrName, _attrValue ) );
                _attrName = "";
                _attrValue = "";
                _state = AttrName;
            }
            break;
		
        case EndTag:
        case EndTagEat:
            if ( _state == EndTag ) {
                _tag = QString( text ).upper();
                _state = EndTagEat;
            } else if ( token == SgmlLexer::MarkupClose ) {
                if ( ( _dtd->getElement( _tag ) ) && ( doEndTag( _tag, _stack.count() ) ) ) {
                    // Commit changes.
                    commit();
                } else {
                    // Roll back changes.
                    rollback();

                    // Try end tag hack to cope with overlapping elements.
                    if ( ( _dtd->getElement( _tag ) ) && ( doEndTagHack( _tag ) ) ) {
                        commit();
                    } else {
                        rollback();
                        console->warning( "SgmlParser::token() -- parse error, unexpected end tag = '</%s>' (ignoring)", _tag.data() );
                        logStack();
                    }
                }
                _state = ContentState;
                _lexer->mode( SgmlLexer::PcdataMode );
            }
            break;

        case MarkupDeclState:
            if ( token == SgmlLexer::MarkupClose ) {
                _state = ContentState;
                _lexer->mode( SgmlLexer::PcdataMode );
            }
            break;

        default:
            console->warning( "SgmlParser::token() -- ignoring token = %d, text = '%s'", text );
            logStack();
            break;
    }
}

bool SgmlParser::doStartTag( STag& tag, int depth )
{
//    printf( "SgmlParser::doStartTag() -- tag = '%s', depth = %d\n", (const char*)tag.name, depth );
    if ( !_dtd->getElement( tag.name ) ) {
        console->warning( "SgmlParser::doStartTag() -- unrecognized element '%s'", tag.name.data() );
        logStack();
        return FALSE;
    }
    
    if ( excluded( tag.name, depth ) ) {
        if ( (depth > 0 ) && satisfied( depth ) && etagOptional( depth ) ) {
            return doStartTag( tag, depth - 1 );
        } else {
            // Parse error!
            return FALSE;
        }
    } else if ( fitsModel( tag.name, depth ) ) {
        // Close off end tags that are past the current depth.
        closeTags( depth );
		
        // Go to next state in content model.
        openTag( tag );
        return TRUE;
    } else if ( included( tag.name, depth ) ) {
        // Close off end tags that are past the current depth.
        closeTags( depth );
        openTag( tag );
        return TRUE;
    } else if ( tag.name == _dtd->start()->name() ) {
        openTag( tag );
        return TRUE;
    } else if ( tryOptional( depth ) ) {
        return doStartTag( tag, _stack.count() );
    } else if ( ( depth > 0 ) && satisfied( depth ) && etagOptional( depth ) ) {
        return doStartTag( tag, depth - 1 );
    } else {
        // Parse error!
        return FALSE;
    }
	
    return FALSE;
}

bool SgmlParser::doEndTag( QString tag, int depth )
{
//    printf( "SgmlParser::doEndTag() -- tag = '%s', current = '%s'\n", tag.data(), _stack.at( depth - 1 )->element->name().data() );
    if ( !_dtd->getElement( tag ) ) {
        console->warning( "SgmlParser::doStartTag() -- unrecognized element '%s'", tag.data() );
        logStack();
        return FALSE;
    }
    
    if ( ( depth > 0 ) && ( tag == _stack.at( depth - 1 )->element->name() ) && satisfied( depth ) ) {
        closeTags( depth - 1 );
        return TRUE;
    } else if ( ( depth > 0 ) && satisfied( depth ) && etagOptional( depth ) ) {
        closeTags( depth - 1 );
        return doEndTag( tag, depth - 1 );
    } else if ( tryOptional( depth ) ) {
        return doEndTag( tag, _stack.count() );
    }
	
    return 0;
}

bool SgmlParser::doEndTagHack( QString tag )
{
    // Try to cope with overlapping elements...
    for ( _stack.last(); _stack.current(); _stack.prev() ) {
        if ( tag == _stack.current()->element->name() ) {
            closeTags( _stack.at() );
            return TRUE;
        }
    }

    return FALSE;
}

bool SgmlParser::doContent( QString text )
{
    if ( _stack.count() < 1 ) return FALSE;
    
//    printf( "SgmlParser::doContent() -- node type = %d\n", _stack.last()->element->tree()->type() );
    
    while ( ( _stack.last() ) && ( _stack.last()->element->isEmpty() ) ) {
        if ( !doEndTag( _stack.last()->element->name(), _stack.count() ) ) {
            rollback();
            return FALSE;
        }
    }

    commit();
    
    console->debug( "SgmlParser::doContent() -- text = '%s'", text.data() );
    
    emit content( text );
    return TRUE;
}

bool SgmlParser::excluded( QString tag, int depth )
{
    if ( depth < 1 ) return FALSE;
    
    for ( State* s = _stack.at( depth - 1 ); s; s = _stack.prev() ) {
        if ( s->element->excluded( tag ) ) {
            return TRUE;
        }
    }
	
    return FALSE;
}

bool SgmlParser::included( QString tag, int depth )
{
    if ( depth < 1 ) return FALSE;
    
    for ( State* s = _stack.at( depth - 1 ); s; s = _stack.prev() ) {
        if ( s->element->included( tag ) ) {
            return TRUE;
        }
    }
	
    return FALSE;
}

bool SgmlParser::etagOptional( int depth )
{
    if ( depth < 1 ) return FALSE;
    
    return !_stack.at( depth - 1 )->element->etagRequired();
}

bool SgmlParser::tryOptional( int depth )
{
    if ( depth < 1 ) {
        // See if document start tag is optional.
        if ( ( !_dtd->start()->stagRequired() ) && ( _stack.count() == 0 ) ) {
//            printf( "SgmlParser::tryOptional() -- trying optional document start tag\n" );
            STag tmp( _dtd->start()->name() );
            openTag( tmp );
            return TRUE;
        } else {
            return FALSE;
        }
    }

    QString next;
    bool b = _stack.at( depth - 1 )->state->defaultTrans( next );
    if ( b ) {
        SgmlElement* e = _dtd->getElement( next );
        if ( ( e ) && ( !e->stagRequired() ) ) {
            STag tmp( next );
            return doStartTag( tmp, depth );
        } else {
            return FALSE;
        }
    } else {
        return FALSE;
    }
}

bool SgmlParser::satisfied( int depth )
{
    if ( depth < 1 ) return FALSE;

    return _stack.at( depth - 1 )->state->accept();
}

bool SgmlParser::fitsModel( QString tag, int depth )
{
    if ( depth < 1 ) return FALSE;
	
//	printf( "SgmlParser::fitsModel() -- tag = '%s', depth = %d, next = %d\n", tag, depth, _stack.at( depth - 1 )->state->nextState( tag ) );
    return _stack.at( depth - 1 )->state->nextState( tag ) != 0;
}

void SgmlParser::closeTags( int depth )
{
    while ( int( _stack.count() ) > depth ) {
//        emit endTag( _stack.last()->element->name() );
        _ops.append( new OpEtag( _stack.last()->element->name(), _stack.last() ) );
        _stack.remove();
    }
}

void SgmlParser::openTag( STag& tag )
{
//    emit startTag( tag );
    if ( _stack.last() ) {
        _ops.append( new OpStag( tag, _stack.last()->state ) );
    } else {
        _ops.append( new OpStag( tag, 0 ) );
    }
	
    // Move to the next state in the content model.
    if ( _stack.count() ) {
        if ( _stack.last()->state->nextState( tag.name ) ) {
            _stack.last()->state = _stack.last()->state->nextState( tag.name );
        }
    }
	
    // Open the new tag.
    SgmlElement* e = _dtd->getElement( tag.name );
    _stack.append( new State( e, e->model().start() ) );
}

void SgmlParser::commit()
{
    for ( Op* op = _ops.first(); op; op = _ops.first() ) {
        if ( op->type == Op::Stag ) {
            OpStag* opStag = (OpStag*)op;
            console->debug( "SgmlParser::commit() -- start tag = <%s>", opStag->stag.name.data() );
            _tagStack.append( new STag( opStag->stag ) );
            _styleStack.append( _styleSheet->getStyle( _styleStack.last(), opStag->stag ) );
            emit startTag();
        } else {
            OpEtag* opEtag = (OpEtag*)op;
            console->debug( "SgmlParser::commit() -- end tag = '</%s>", opEtag->etag.data() );
            emit endTag();
	    delete _tagStack.last();
            _tagStack.remove();
	    delete _styleStack.last();
            _styleStack.remove();
            delete opEtag->state;
        }
        _ops.remove();
        delete op;
    }
}

void SgmlParser::rollback()
{
    for ( Op* op = _ops.last(); op; op = _ops.last() ) {
        if ( op->type == Op::Stag ) {
            OpStag* opStag = (OpStag*)op;
            delete _stack.last();
            _stack.removeLast();
            if ( _stack.last() ) {
                _stack.last()->state = opStag->state;
            }
        } else {
            OpEtag* opEtag = (OpEtag*)op;
            _stack.append( opEtag->state );
        }
        _ops.remove();
        delete op;
    }
}

void SgmlParser::logStack()
{
    QString stack;
    QListIterator<STag> i( _tagStack );
    for ( i.toFirst(); i.current(); ++i ) {
        stack += "/";
        stack += i.current()->name;
    }
    console->warning( "SgmlParser::logStack() -- tag stack = '%s'", stack.data() );
}
