package info.bliki.htmlcleaner;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.cli.HelpFormatter;

/* loaded from: input_file:info/bliki/htmlcleaner/HtmlTokenizer.class */
public class HtmlTokenizer {
    private static final int WORKING_BUFFER_SIZE = 1024;
    private BufferedReader _reader;
    private char[] _working = new char[1024];
    private transient int _pos = 0;
    private transient int _len = -1;
    private transient StringBuffer _saved = new StringBuffer(512);
    private transient boolean _isLateForDoctype = false;
    private transient TagToken _currentTagToken = null;
    private transient List _tokenList = new ArrayList();
    private boolean _asExpected = true;
    private boolean _isScriptContext = false;
    private boolean _isStyleContext = false;
    private HtmlCleaner cleaner;

    public HtmlTokenizer(HtmlCleaner htmlCleaner) throws IOException {
        this._reader = new BufferedReader(htmlCleaner.getReader());
        this.cleaner = htmlCleaner;
    }

    private void addToken(BaseToken baseToken) {
        this._tokenList.add(baseToken);
        this.cleaner.makeTree(this._tokenList, this._tokenList.listIterator(this._tokenList.size() - 1));
    }

    private void readIfNeeded(int i) throws IOException {
        if (this._len != -1 || this._pos + i < 1024) {
            return;
        }
        int i2 = 1024 - this._pos;
        System.arraycopy(this._working, this._pos, this._working, 0, i2);
        this._pos = 0;
        int read = this._reader.read(this._working, i2, 1024 - i2);
        if (read < 1024 - i2) {
            this._len = (read == -1 && i2 == 0) ? 0 : read + i2;
        } else if (read == -1) {
            this._len = i2;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public List getTokenList() {
        return this._tokenList;
    }

    private void go() throws IOException {
        this._pos++;
        readIfNeeded(0);
    }

    private void go(int i) throws IOException {
        this._pos += i;
        readIfNeeded(i - 1);
    }

    private boolean startsWith(String str) throws IOException {
        int length = str.length();
        readIfNeeded(length);
        if (this._len >= 0 && this._pos + length > this._len) {
            return false;
        }
        for (int i = 0; i < length; i++) {
            if (Character.toLowerCase(str.charAt(i)) != Character.toLowerCase(this._working[this._pos + i])) {
                return false;
            }
        }
        return true;
    }

    private boolean isWhitespace(int i) {
        if (this._len < 0 || i < this._len) {
            return Character.isWhitespace(this._working[i]);
        }
        return false;
    }

    private boolean isWhitespace() {
        return isWhitespace(this._pos);
    }

    private boolean isChar(int i, char c) {
        return (this._len < 0 || i < this._len) && Character.toLowerCase(c) == Character.toLowerCase(this._working[i]);
    }

    private boolean isChar(char c) {
        return isChar(this._pos, c);
    }

    private boolean isIdentifierStartChar(int i) {
        if (this._len >= 0 && i >= this._len) {
            return false;
        }
        char c = this._working[i];
        return Character.isUnicodeIdentifierStart(c) || ':' == c;
    }

    private boolean isIdentifierStartChar() {
        return isIdentifierStartChar(this._pos);
    }

    private boolean isIdentifierChar() {
        if (this._len >= 0 && this._pos >= this._len) {
            return false;
        }
        char c = this._working[this._pos];
        return Character.isUnicodeIdentifierStart(c) || Character.isDigit(c) || ':' == c || '.' == c || '-' == c;
    }

    private boolean isAllRead() {
        return this._len >= 0 && this._pos >= this._len;
    }

    private void save(char c) {
        this._saved.append(c);
    }

    private void saveCurrent() {
        if (isAllRead()) {
            return;
        }
        save(this._working[this._pos]);
    }

    private void saveCurrent(int i) throws IOException {
        readIfNeeded(i);
        int i2 = this._pos;
        while (!isAllRead() && i > 0) {
            save(this._working[i2]);
            i2++;
            i--;
        }
    }

    private void skipWhitespaces() throws IOException {
        while (!isAllRead() && isWhitespace()) {
            saveCurrent();
            go();
        }
    }

    private void addSavedAsContent() {
        if (this._saved.length() > 0) {
            addToken(new ContentToken(this._saved.toString()));
            this._saved.delete(0, this._saved.length());
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void start() throws IOException {
        this._currentTagToken = null;
        this._tokenList.clear();
        this._asExpected = true;
        this._isScriptContext = false;
        this._isStyleContext = false;
        this._isLateForDoctype = false;
        this._pos = 1024;
        readIfNeeded(0);
        while (!isAllRead()) {
            this._saved.delete(0, this._saved.length());
            this._currentTagToken = null;
            this._asExpected = true;
            readIfNeeded(10);
            if (this._isScriptContext) {
                if (startsWith("</script") && (isWhitespace(this._pos + 8) || isChar(this._pos + 8, '>'))) {
                    tagEnd();
                } else {
                    content();
                }
            } else if (this._isStyleContext) {
                if (startsWith("</style") && (isWhitespace(this._pos + 7) || isChar(this._pos + 7, '>'))) {
                    tagEnd();
                } else {
                    content();
                }
            } else if (startsWith("<!doctype")) {
                if (this._isLateForDoctype) {
                    ignore();
                } else {
                    doctype();
                    this._isLateForDoctype = true;
                }
            } else if (startsWith("</") && isIdentifierStartChar(this._pos + 2)) {
                this._isLateForDoctype = true;
                tagEnd();
            } else if (startsWith("<!--")) {
                comment();
            } else if (startsWith("<") && isIdentifierStartChar(this._pos + 1)) {
                this._isLateForDoctype = true;
                tagStart();
            } else if (startsWith("<![")) {
                this._isLateForDoctype = true;
                while (true) {
                    if (!isAllRead()) {
                        go();
                        if (isChar(']')) {
                            go();
                            if (isChar('>')) {
                                go();
                                break;
                            }
                        }
                    }
                }
            } else {
                content();
            }
        }
        this._reader.close();
    }

    private void tagStart() throws IOException {
        saveCurrent();
        go();
        if (isAllRead()) {
            return;
        }
        String identifier = identifier();
        this._currentTagToken = new TagNode(identifier);
        if (!this._asExpected) {
            addSavedAsContent();
            return;
        }
        skipWhitespaces();
        tagAttributes();
        String stringBuffer = this._saved.toString();
        addToken(this._currentTagToken);
        if (isChar('>')) {
            go();
            if ("script".equalsIgnoreCase(identifier)) {
                this._isScriptContext = true;
            } else if ("style".equalsIgnoreCase(identifier)) {
                this._isStyleContext = true;
            }
            stringBuffer = stringBuffer + ">";
        } else if (startsWith("/>")) {
            go(2);
            addToken(new EndTagToken(identifier));
            stringBuffer = stringBuffer + "/>";
        }
        this._currentTagToken.setOriginalSource(stringBuffer);
        this._currentTagToken = null;
    }

    private void tagEnd() throws IOException {
        saveCurrent(2);
        go(2);
        if (isAllRead()) {
            return;
        }
        String identifier = identifier();
        this._currentTagToken = new EndTagToken(identifier);
        if (!this._asExpected) {
            addSavedAsContent();
            return;
        }
        skipWhitespaces();
        tagAttributes();
        String stringBuffer = this._saved.toString();
        addToken(this._currentTagToken);
        if (isChar('>')) {
            go();
            stringBuffer = stringBuffer + ">";
        }
        if ("script".equalsIgnoreCase(identifier)) {
            this._isScriptContext = false;
        } else if ("style".equalsIgnoreCase(identifier)) {
            this._isStyleContext = false;
        }
        this._currentTagToken.setOriginalSource(stringBuffer);
        this._currentTagToken = null;
    }

    private String identifier() throws IOException {
        this._asExpected = true;
        if (!isIdentifierStartChar()) {
            this._asExpected = false;
            return null;
        }
        StringBuffer stringBuffer = new StringBuffer(16);
        while (!isAllRead() && isIdentifierChar()) {
            saveCurrent();
            stringBuffer.append(this._working[this._pos]);
            go();
        }
        return stringBuffer.toString();
    }

    private void tagAttributes() throws IOException {
        while (!isAllRead() && this._asExpected && !isChar('>') && !startsWith("/>")) {
            skipWhitespaces();
            String identifier = identifier();
            if (this._asExpected) {
                String str = identifier;
                skipWhitespaces();
                if (isChar('=')) {
                    saveCurrent();
                    go();
                    str = attributeValue();
                }
                if (this._asExpected) {
                    this._currentTagToken.addAttribute(identifier, str, false);
                }
            } else {
                if (!isChar('<') && !isChar('>') && !startsWith("/>")) {
                    saveCurrent();
                    go();
                }
                if (!isChar('<')) {
                    this._asExpected = true;
                }
            }
        }
    }

    private String attributeValue() throws IOException {
        skipWhitespaces();
        if (isChar('<') || isChar('>') || startsWith("/>")) {
            return "";
        }
        boolean z = false;
        boolean z2 = false;
        StringBuffer stringBuffer = new StringBuffer();
        if (isChar('\'')) {
            z2 = true;
            saveCurrent();
            go();
        } else if (isChar('\"')) {
            z = true;
            saveCurrent();
            go();
        }
        while (!isAllRead() && ((z2 && !isChar('\'')) || ((z && !isChar('\"')) || (!z2 && !z && !isWhitespace() && !isChar('>') && !startsWith("/>"))))) {
            stringBuffer.append(this._working[this._pos]);
            saveCurrent();
            go();
        }
        if (isChar('\'') && z2) {
            saveCurrent();
            go();
        } else if (isChar('\"') && z) {
            saveCurrent();
            go();
        }
        return stringBuffer.toString();
    }

    private void content() throws IOException {
        while (!isAllRead()) {
            saveCurrent();
            go();
            if (isChar('<')) {
                break;
            }
        }
        if (this._saved.length() > 0) {
            if (this._saved.toString().trim().length() == 0) {
                this._saved.delete(0, this._saved.length());
                this._saved.append(' ');
            }
            addSavedAsContent();
        }
    }

    private void ignore() throws IOException {
        while (!isAllRead()) {
            go();
            if (isChar('<')) {
                return;
            }
        }
    }

    private void comment() throws IOException {
        go(4);
        while (!isAllRead() && !startsWith("-->")) {
            saveCurrent();
            go();
        }
        if (startsWith("-->")) {
            go(3);
        }
        if (this._saved.length() > 0) {
            if (!this.cleaner.isOmitComments()) {
                String hyphenReplacementInComment = this.cleaner.getHyphenReplacementInComment();
                String replaceAll = this._saved.toString().replaceAll(HelpFormatter.DEFAULT_LONG_OPT_PREFIX, hyphenReplacementInComment + hyphenReplacementInComment);
                if (replaceAll.length() > 0 && replaceAll.charAt(0) == '-') {
                    replaceAll = hyphenReplacementInComment + replaceAll.substring(1);
                }
                int length = replaceAll.length();
                if (length > 0 && replaceAll.charAt(length - 1) == '-') {
                    replaceAll = replaceAll.substring(0, length - 1) + hyphenReplacementInComment;
                }
                addToken(new CommentToken(replaceAll));
            }
            this._saved.delete(0, this._saved.length());
        }
    }

    private void doctype() throws IOException {
        go(9);
        skipWhitespaces();
        String identifier = identifier();
        skipWhitespaces();
        String identifier2 = identifier();
        skipWhitespaces();
        String attributeValue = attributeValue();
        skipWhitespaces();
        String attributeValue2 = attributeValue();
        ignore();
        DoctypeToken doctypeToken = new DoctypeToken(identifier, identifier2, attributeValue, attributeValue2);
        if (doctypeToken.isValid()) {
            this.cleaner.setDoctype(doctypeToken);
        }
    }
}
