// ====================================================================
// Copyright (c) 1997, 1998 The Apache Group.  All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer. 
//
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in
//    the documentation and/or other materials provided with the
//    distribution.
//
// 3. All advertising materials mentioning features or use of this
//    software must display the following acknowledgment:
//    "This product includes software developed by the Apache Group
//    for use in the Apache HTTP server project (http://www.apache.org/)."
//
// 4. The names "Apache Server" and "Apache Group" must not be used to
//    endorse or promote products derived from this software without
//    prior written permission.
//
// 5. Redistributions of any form whatsoever must retain the following
//    acknowledgment:
//    "This product includes software developed by the Apache Group
//    for use in the Apache HTTP server project (http://www.apache.org/)."
//
// THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY
// EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE APACHE GROUP OR
// ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
// OF THE POSSIBILITY OF SUCH DAMAGE.
// ====================================================================

// TODO:
// (from williams@ugsolutions.com:)
// - Figure out how to make this this compile in util, not jserv

package org.apache.jserv;

import java.util.Hashtable;
import java.util.Enumeration;

/**
 * Convenient class for parsing SGML tokens from a page.
 * <p>This class is optimized for speed, not ease of use.
 * (Though I'd content its fairly easy to use anyway!)
 * Tags are only read enough to find out what the tag name is;
 * the tag may be checked for completeness by calling isWellFormed().
 * This is done so that applications don't spend time processing
 * tags about which they care little.
 * <p>Here's a sample piece of code which uses this class to read
 * all SGML tags on a page:
 * <pre>
 *  void showTags(PrintWriter out, String text)
 *  {
 *      for (   SGMLTag tag = new SGMLTag(text, 0);
 *              !tag.finished();
 *              tag = new SGMLTag(text, tag.end))
 *          out.println("tag: " + tag.toString();
 *  }
 * </pre>
 * @author Tim Williams
 */

public class SGMLTag
{
    /**
     * Name of this SGML tag, in uppercase format.
     * For example, P for paragraph, B for bold, etc.
     * This value is set to null when whitespace or another
     * problem was encountered where the tag would be.
     * @see #isWellFormed
     */
    public String name = null;

    /**
     * Location on "page" (passed string) where this tag begins
     * (inclusive).  (This is the opening greater-than sign.)
     */
    public int start;

    /**
     * Place on page where tag ends, <i>as far as this class
     * knows</i>.
     * If tag's attributes have not yet been check, or tag
     * has not been checked for validity, (via isWellFormed()).
     * This value is exclusive, e.g. the last character in the tag
     * is one before charcter before this index.
     * @see #isWellFormed
     */
    public int end;

    // private stuff
    private Hashtable params = null;            // tag attributes 
    private boolean wellFormed = true;          // looks good?
    private String text;                        // text being scanned

    /**
     * Create new SGML tag reference, starting at given location.
     * At first, only the type of tag (first argument) is read.
     * Tag may not be well-formed: if interested, call "getAttributes"

     * and check for non-null return value to insure well-formed tag.
     * @param text string being parsed for SGML tags
     * @param begin first character index to examine
     * @return new SGML tag location, or null if no more tags present
     * @see #getAttributes 
     */
    public SGMLTag(String text, int begin)
    {
        search(text, begin);
    }

    /**
     * Checked whether this tag indicates we're at end of the list.
     * Note: The end tag is not usuable as an SGML tag.
     * @return true if tag represents end of tags and isn't usuable
     */
    public boolean finished()
    {
        return start == -1 && name == null;
    }

    /**
     * Check name of tag.
     * (Comparision is case-insensitive.)
     * @return true if passed tag matches this one.
     */
    public boolean isNamed(String name)
    {
        return (this.name != null &&
                this.name.equals(name.toUpperCase()));
    }

    /**
     * Check for well-formedness of this tag.
     * Note that calling this method causes rest of tag to be parsed.
     * @return true if tag is a well-formed SGML tag, false otherwise
     */
    public boolean isWellFormed()
    {
        if (name == null) return false;
        if (params == null) getAttributes();
        return wellFormed;
    }

    /**
     * Return value of attribute (parameter) setting in SGML tag.
     * @param key name (uppercase) of attribute for which to check
     * @param default value if attribute unset
     * @return value of that attribute, or default if not defined
     */
    public String getAttribute(String key, String defaultValue)
    {
        if (params == null) getAttributes();
        String value = (String) params.get(key);
        return key == null ? defaultValue : value;
    }

    /**
     * Return tag attributes and values.
     * @return parameter key / value pairs
     */
    public Hashtable getAttributes()
    {
        if (params == null && wellFormed) {
            String key, token;
            wellFormed = false;
            params = new Hashtable();
            while (true) 
            {
                // check for valid value tag (or end delimiter)
                end = skipWhiteSpace(text, end);
                key = nextToken(text, end);
                if (key != null && key.equals(">")) {
                    wellFormed = true;
                    end++;
                    break;
                }
                if (key == null || key.charAt(0) == '"'
                        || isDelimiter(key.charAt(0)))
                    break;

                // now insure that we have an equals sign
                token = nextToken(text, end += key.length());
                if (token == null || token.charAt(0) != '=')
                    break;

                // read value of tag
                token = nextToken(text, end += 1);
                if (token == null || isDelimiter(token.charAt(0)))
                    break;
                end += token.length();
                if (token.charAt(0) == '"') // strip quotes
                    token = token.substring(1, token.length() - 1);

                // store assignment
                params.put(key.toUpperCase(), token);
            }
        }
        if (!wellFormed) return null;
        else return params;
    }

    /**
     * Read next token from string.
     * A token is a space-delimited word, a string in quotes
     * (returned with quotes), a delimiter such as a greater-than,
     * less-than, or equals sign.
     * @param string string begin parsed
     * @param index location within string to start examining
     * @return next token, or null if whitespace was encountered
     */
    public static String nextToken(String string, int index)
    {
        String token = "";
        char c = string.charAt(index);

        // quoted string?
        if (c == '"') {
            token += c;
            do {
                c = string.charAt(++index);
                if (c == '\\') token += string.charAt(++index);
                else token += c;
            } while (c != '"');
        }

        // parameter delimiter?
        else if (isDelimiter(c))
            token += c;

        // word token?
        else if (!isWhiteSpace(c)) {
            do { token += c; c = string.charAt(++index); } 
            while (!isWhiteSpace(c) && !isDelimiter(c));
        }

        // otherwise, give back a null
        else token = null;

        return token;
    }

    /**
     * Increment index into string to pass over any white space
     * characters.
     * @param string string being examined
     * @param index current location within string
     * @return index incremented to be on first non-whitespace char
     */
    public static int skipWhiteSpace(String string, int index)
    {
        char c;
        do c = string.charAt(index++);
        while (isWhiteSpace(c));
        return index - 1;
    }

    /**
     * Decide whether character is white space.
     * @param c character in question
     * @return true if character is a white space character
     */
    public static boolean isWhiteSpace(char c) 
    {
        // -----------------------------------------------------------
        return Character.isWhitespace(c);
        // -----------------------------------------------------------
        // return c == ' ' || c == '\t' || c == '\n';
    }

    /**
     * Decide whether character is SGML delimiter or equals.
     * @param c character in question
     * @return true if character is an SGML delimiter
     */
    public static boolean isDelimiter(char c) 
    {
        return c == '<' || c == '=' || c == '>';
    }

    /**
     * Render this tag as a string.
     * @return SGML tag as string, showing range and values
     */
    public String toString()
    {
        Hashtable attrs = getAttributes();
        String str="[SGMLTag " + name + ": (" + start +","+ end + ")";
        if (attrs != null && wellFormed) {
            Enumeration e = attrs.keys();
            while (e.hasMoreElements()) {
                Object key = e.nextElement();
                str += " " + key + "=\"" + attrs.get(key) + "\"";
            }
        }
        else str += " *MALFORMED TAG*";
        return str + " ]";
    }
    
    /**
     * Create new SGML tag reference, starting at given location.
     * At first, only the type of tag (first argument) is read.
     * Tag may not be well-formed: if interested, call "getAttributes"

     * and check for non-null return value to insure well-formed tag.
     * @param text string being parsed for SGML tags
     * @param begin first character index to examine
     * @see #getAttributes 
     */
    private void search(String text, int begin)
    {
        // find starting character
        this.text = text;
        start = text.indexOf('<', begin);
        if (start == -1) return;
        
        // -----------------------------------------------------------
        
        // Make sure that this isn't a comment
        if(text.startsWith("<!--", start))
        {
            // Find the end of the comment
            start = text.indexOf("-->", start);
            
            // Make sure we found the end of the comment
            if(start == -1)
            {
                return;
            }
            
            // Increment past the end of the comment
            begin = start + 3;
            
            // Try again
            search(text, start);
        }
        
        // -----------------------------------------------------------
        
        end = start + 1;
        name = nextToken(text, end);
        if (name != null) {
            end += name.length();
            name = name.toUpperCase();
        }
    }
}

