/*
 * Copyright (c) 1997-1998 The Java Apache Project.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. All advertising materials mentioning features or use of this
 *    software must display the following acknowledgment:
 *    "This product includes software developed by the Java Apache 
 *    Project for use in the Apache JServ servlet engine project
 *    (http://java.apache.org/)."
 *
 * 4. The names "Apache JServ", "Apache JServ Servlet Engine" and 
 *    "Java Apache Project" must not be used to endorse or promote products 
 *    derived from this software without prior written permission.
 *
 * 5. Products derived from this software may not be called "Apache JServ"
 *    nor may "Apache" nor "Apache JServ" appear in their names without 
 *    prior written permission of the Java Apache Project.
 *
 * 6. Redistributions of any form whatsoever must retain the following
 *    acknowledgment:
 *    "This product includes software developed by the Java Apache 
 *    Project for use in the Apache JServ servlet engine project
 *    (http://java.apache.org/)."
 *    
 * THIS SOFTWARE IS PROVIDED BY THE JAVA APACHE PROJECT "AS IS" AND ANY
 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE JAVA APACHE PROJECT OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Java Apache Group. For more information
 * on the Java Apache Project and the Apache JServ Servlet Engine project,
 * please see <http://java.apache.org/>.
 *
 */

package org.apache.java.util;

import java.util.*;

/**
 * Convenient class for parsing SGML tokens from a page.
 *
 * <p>This class is optimized for speed, not ease of use.
 * (Though I'd contend its fairly easy to use anyway!)
 * Tags are only read enough to find out what the tag name is;
 * the tag may be checked for completeness by calling isWellFormed().
 * This is done so that applications don't spend time processing
 * tags about which they care little.
 *
 * <p>Here's a sample piece of code which uses this class to read
 * all SGML tags on a page:
 *
 * <pre>
 * void showTags(PrintWriter out, String text)
 * {
 *      for (   SGMLTag tag = new SGMLTag(text, 0);
 *          !tag.finished();
 *          tag = new SGMLTag(text, tag.end))
 *          out.println("tag: " + tag.toString());
 * }
 * </pre>
 *
 * @author <a href="mailto:williams@ugsolutions.com">Tim Williams</a>
 * @version $Revision: 1.1.1.1 $ $Date: 1998/11/26 23:23:04 $
 */

public class SGMLTag {

    /**
     * Name of this SGML tag, in uppercase format.
     * For example, P for paragraph, B for bold, etc.
     * This value is set to null when whitespace or another
     * problem was encountered where the tag would be.
     * @see #isWellFormed
     */
    public String name = null;

    /**
     * Location on "page" (passed string) where this tag begins
     * (inclusive).  (This is the opening greater-than sign.)
     */
    public int start;

    /**
     * Place on page where tag ends, <i>as far as this class knows</i>.
     * If tag's attributes have not yet been check, or tag
     * has not been checked for validity, (via isWellFormed()).
     * This value is exclusive, e.g. the last character in the tag
     * is one before charcter before this index.
     * @see #isWellFormed
     */
    public int end;

    // private stuff
    private Vector attrs = null;            // tag attributes (mixed)
    private Hashtable values = null;        // tag attribute values (uc)
    private boolean wellFormed = true;      // looks good?
    private String text;                    // text being scanned
    private int escapes = 0;                // escape chars in last token

    // comment delineation
    static final String COMMENT_START = "<!--", COMMENT_END = "-->";

    /**
     * Create new SGML tag reference, starting at given location.
     * At first, only the type of tag (first argument) is read.
     * Tag may not be well-formed: if interested, call "getAttributes"
     * and check for non-null return value to insure well-formed tag.
     * Note that this constructor skips over any HTML-style comments,
     * as denoted by matched <tt>&lt;--</tt> ... <tt>--&gt;</tt> pairs.
     * @param text string being parsed for SGML tags
     * @param begin first character index to examine
     * @return new SGML tag location, or null if no more tags present
     * @see #getAttributes
     */
    public SGMLTag(String text, int begin) {
        // store data
        this.text = text;
        start = begin;

        // skipping over comments, find first tag
        while (true) {
            // find starting character of SGML tag
            start = text.indexOf('<', start);
            if (start == -1) return; // no next tag: done

            // not a comment? good, time to exit loop
            if (!text.substring(start, start + COMMENT_START.length())
                .equals(COMMENT_START))
            break;

            // otherwise skip extent of commented area
            start = text.indexOf(COMMENT_END, start + COMMENT_START.length());
            if (start == -1) return; // no matching close: done
            start += COMMENT_END.length(); // skip comment closing symbol
        }

        // move to start of next token
        end = start + 1;
        name = nextToken(text, end);
        if (name != null) {
            end += name.length();
            name = name.toUpperCase();
        }
    }

    /**
     * Checked whether this tag indicates we're at the end of the list.
     * Note: The end tag is not usuable as an SGML tag.
     * @return true if this tag represents end of tags, and is not usuable
     */
    public boolean finished() {
        return start == -1 && name == null;
    }

    /**
     * Check name of tag.
     * (Comparision is case-insensitive.)
     * @return true if passed tag matches this one.
     */
    public boolean isNamed(String name) {
        return this.name != null && this.name.equals(name.toUpperCase());
    }

    /**
     * Check for well-formedness of this tag.
     * Note that calling this method causes rest of tag to be parsed.
     * @return true if tag is a well-formed SGML tag, false otherwise
     */
    public boolean isWellFormed() {
        if (name == null) return false;
        if (values == null) readAttributes();
        return wellFormed;
    }

    /**
     * Get list of attribute names.
     * @param upperCase true returns names in all uppercase (good for
     * case-insensitive applications), false returns attribute names
     * with same case as in original text
     * @return enumeration of attribute names specified as strings,
     * or null if this tag is poorly formed
     */
    public Enumeration attributes(boolean upperCase) {
        // check to make sure attributes have been read
        if (!readAttributes())
            return null;

        // or return uppercase names?
        if (upperCase) {
            return values.keys();
        } else {
            return attrs.elements();
        }
    }

    /**
     * Get attribute value, or default if not set.
     * Case is ignored, <tt>value("a")</tt> will return the same
     * result as <tt>value("A")</tt>.  Note also that if wish to
     * check whether value was set, you can pass <tt>null</tt>
     * as the defaultValue.
     * @param attributeName attribute for which to check
     * @param default value if attribute unset
     * @return value of attribute, or defaultValue if not available
     */
    public String value(String attributeName, String defaultValue) {
        if (values == null) readAttributes();
        String value = (String) values.get(attributeName.toUpperCase());
        return value == null ? defaultValue : value;
    }

    /**
     * Attempt to read attributes from tag if not already read.
     * @return true if everything was read fine, false otherwise
     */
    private boolean readAttributes() {
        if (values == null && wellFormed) {
            String key, token;
            wellFormed = false;
            attrs = new Vector();
            values = new Hashtable();

            while (true) {
                // check for valid value tag (or end delimiter)
                end = skipWhiteSpace(text, end);
                key = nextToken(text, end);
                if (key != null && key.equals(">")) {
                    wellFormed = true;
                    end++;
                    break;
                }

                if (key == null || key.charAt(0) == '"'
                    || isDelimiter(key.charAt(0)))
                    break;

                // now insure that we have an equals sign
                token = nextToken(text, end += key.length() + escapes);
                if (token == null || token.charAt(0) != '=')
                    break;

                // read value of tag
                token = nextToken(text, end += 1);
                if (token == null || isDelimiter(token.charAt(0)))
                    break;

                end += token.length() + escapes;
                if (token.charAt(0) == '"') // strip quotes
                    token = token.substring(1, token.length() - 1);

                // store attribute name with original case
                String upperCase = key.toUpperCase();
                if (!values.containsKey(upperCase))
                    attrs.addElement(key);

                // store assignment in case-insensitive manner
                values.put(upperCase, token);
            }
        }
        return wellFormed && values != null;
    }

    /**
     * Return value of attribute (parameter) setting in SGML tag.
     * @param key name (uppercase) of attribute for which to check
     * @param default value if attribute unset
     * @deprecated use <tt>attributes()</tt> and <tt>value()</tt> instead
     * @see #attributes
     * @see #value
     * @return value of that attribute, or default if not defined
     */
    public String getAttribute(String key, String defaultValue) {
        return value(key, defaultValue);
    }

    /**
     * Return tag attributes and values.
     * @param ignoreCase false converts all keys to uppercase, true leaves
     * case of values alone
     * @return parameter key / value pairs
     * @deprecated use <tt>attributes()</tt> and <tt>value()</tt> instead
     * @see #attributes
     * @see #value
     */
    public Hashtable getAttributes() {
        return readAttributes() ? values : null;
    }

    /**
     * Read next token from string.
     * A token is a space-delimited word, a string in quotes
     * (returned with quotes), a delimiter such as a greater-than,
     * less-than, or equals sign.
     * Quotes marks inside quoted strings may be escaped with a
     * backslash (\) character.
     * @param string string begin parsed
     * @param index location within string to start examining
     * @return next token, or null if whitespace was encountered
     */
    public String nextToken(String string, int index) {
        String token = "";
        char c = string.charAt(index);

        // reset number of escape characters encountered (messy!)
        escapes = 0;

        // quoted string? (handle both single and double added)
        if (c == '\"' || c == '\'') {
            boolean inSingle = false;
            boolean inDouble = false;
            if (c == '\'') inSingle = true; else inDouble = true;
            token += c;
            do {
                c = string.charAt(++index);
                if (c == '\\') token += string.charAt(++index);
                else token += c;
            } while ((inDouble && c != '\"') || (inSingle && c != '\''));
        } else if (isDelimiter(c)) { // parameter delimiter?
            token += c;
        } else if (!Character.isWhitespace(c)) { // word token?
            do { token += c; c = string.charAt(++index); }
            while (!Character.isWhitespace(c) && !isDelimiter(c));
        } else { // otherwise, give back a null
            token = null;
        }

        return token;
    }

    /**
     * Increment index into string to pass over any white space
     * characters.
     * @param string string being examined
     * @param index current location within string
     * @return index incremented to be on first non-whitespace character
     */
    public static int skipWhiteSpace(String string, int index) {
        char c;
        do {
            c = string.charAt(index++);
        } while (Character.isWhitespace(c));
        return index - 1;
    }

    /**
     * Decide whether character is SGML delimiter or equals.
     * @param c character in question
     * @return true if character is an SGML delimiter
     */
    public static boolean isDelimiter(char c) {
        return c == '<' || c == '=' || c == '>';
    }

    /**
     * Render this tag as a string.
     * @return SGML tag as string, showing range and values
     */
    public String toString() {
        readAttributes();
        String str = "[SGMLTag " + name + ": (" + start +","+ end + ")";
        if (attrs != null && wellFormed) {
            Enumeration e = attributes(true);
            while (e.hasMoreElements()) {
                Object key = e.nextElement();
                str += " " + key + "=\"" + value((String) key, null) + "\"";
            }
        } else {
            str += " *MALFORMED TAG*";
        }

        return str + " ]";
    }
}

