package com.oracle.truffle.regex.tregex.parser.flavors;

import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.regex.AbstractRegexObject;
import com.oracle.truffle.regex.RegexFlags;
import com.oracle.truffle.regex.RegexLanguage;
import com.oracle.truffle.regex.RegexSource;
import com.oracle.truffle.regex.RegexSyntaxException;
import com.oracle.truffle.regex.charset.ClassSetContents;
import com.oracle.truffle.regex.charset.CodePointSet;
import com.oracle.truffle.regex.charset.CodePointSetAccumulator;
import com.oracle.truffle.regex.charset.Constants;
import com.oracle.truffle.regex.errors.PyErrorMessages;
import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer;
import com.oracle.truffle.regex.tregex.parser.RegexASTBuilder;
import com.oracle.truffle.regex.tregex.parser.RegexParser;
import com.oracle.truffle.regex.tregex.parser.Token;
import com.oracle.truffle.regex.tregex.parser.ast.Group;
import com.oracle.truffle.regex.tregex.parser.ast.LookBehindAssertion;
import com.oracle.truffle.regex.tregex.parser.ast.RegexAST;
import com.oracle.truffle.regex.tregex.parser.ast.RegexASTNode;
import com.oracle.truffle.regex.tregex.parser.ast.RegexASTRootNode;
import com.oracle.truffle.regex.tregex.parser.ast.RegexASTSubtreeRootNode;
import java.util.ArrayList;
import java.util.EnumSet;

/* loaded from: input_file:BOOT-INF/lib/regex-24.2.0.jar:com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexParser.class */
public final class PythonRegexParser implements RegexParser {
    private static final EnumSet<Token.Kind> QUANTIFIER_PREV;
    private final PythonREMode mode;
    private final PythonRegexLexer lexer;
    private final RegexASTBuilder astBuilder;
    private final CodePointSetAccumulator curCharClass = new CodePointSetAccumulator();
    private final CodePointSetAccumulator curCharClassCaseClosure = new CodePointSetAccumulator();
    static final /* synthetic */ boolean $assertionsDisabled;

    public PythonRegexParser(RegexLanguage regexLanguage, RegexSource regexSource, CompilationBuffer compilationBuffer) throws RegexSyntaxException {
        this.mode = PythonREMode.fromEncoding(regexSource.getEncoding());
        this.lexer = new PythonRegexLexer(regexSource, this.mode, compilationBuffer);
        this.astBuilder = new RegexASTBuilder(regexLanguage, regexSource, createECMAScriptFlags(regexSource), false, compilationBuffer);
    }

    private static RegexFlags createECMAScriptFlags(RegexSource regexSource) {
        return RegexFlags.builder().dotAll(true).unicode(true).sticky(regexSource.getOptions().getMatchingMode() == MatchingMode.match || regexSource.getOptions().getMatchingMode() == MatchingMode.fullmatch).build();
    }

    private PythonFlags getLocalFlags() {
        return this.lexer.getLocalFlags();
    }

    @Override // com.oracle.truffle.regex.tregex.parser.RegexParser
    public PythonFlags getFlags() {
        return this.lexer.getGlobalFlags();
    }

    @Override // com.oracle.truffle.regex.tregex.parser.RegexParser
    public AbstractRegexObject getNamedCaptureGroups() {
        return AbstractRegexObject.createNamedCaptureGroupMapInt(this.lexer.getNamedCaptureGroups());
    }

    @Override // com.oracle.truffle.regex.tregex.parser.RegexParser
    @CompilerDirectives.TruffleBoundary
    public RegexAST parse() throws RegexSyntaxException {
        this.astBuilder.pushRootGroup(true);
        if (this.lexer.source.getOptions().getMatchingMode() == MatchingMode.fullmatch) {
            this.astBuilder.pushGroup();
        }
        ArrayList<Token.BackReference> arrayList = new ArrayList();
        Token token = null;
        while (this.lexer.hasNext()) {
            Token token2 = token;
            Token.Kind kind = token2 == null ? null : token2.kind;
            token = this.lexer.next();
            switch (token.kind) {
                case A:
                case Z:
                    this.astBuilder.addPositionAssertion(token);
                    break;
                case caret:
                    if (kind == Token.Kind.caret) {
                        break;
                    } else if (getLocalFlags().isMultiLine()) {
                        this.astBuilder.pushGroup();
                        this.astBuilder.addCaret();
                        this.astBuilder.nextSequence();
                        this.astBuilder.pushLookBehindAssertion(false);
                        this.astBuilder.addCharClass(CodePointSet.create(10));
                        this.astBuilder.popGroup();
                        this.astBuilder.popGroup();
                        break;
                    } else {
                        this.astBuilder.addPositionAssertion(token);
                        break;
                    }
                case dollar:
                    if (kind != Token.Kind.dollar) {
                        this.astBuilder.pushGroup();
                        this.astBuilder.addDollar();
                        this.astBuilder.nextSequence();
                        this.astBuilder.pushLookAheadAssertion(false);
                        this.astBuilder.addCharClass(CodePointSet.create(10));
                        if (!getLocalFlags().isMultiLine()) {
                            this.astBuilder.addDollar();
                        }
                        this.astBuilder.popGroup();
                        this.astBuilder.popGroup();
                        break;
                    } else {
                        break;
                    }
                case wordBoundary:
                    if (kind != Token.Kind.wordBoundary) {
                        if (kind == Token.Kind.nonWordBoundary) {
                            this.astBuilder.replaceCurTermWithDeadNode();
                            break;
                        } else if (getLocalFlags().isUnicode(this.mode)) {
                            this.astBuilder.addWordBoundaryAssertion(this.lexer.getPredefinedCharClass('w'), this.lexer.getPredefinedCharClass('W'));
                            break;
                        } else if (getLocalFlags().isLocale()) {
                            this.astBuilder.addWordBoundaryAssertion(this.lexer.getLocaleData().getWordCharacters(), this.lexer.getLocaleData().getNonWordCharacters());
                            break;
                        } else {
                            this.astBuilder.addWordBoundaryAssertion(Constants.WORD_CHARS, Constants.NON_WORD_CHARS);
                            break;
                        }
                    } else {
                        break;
                    }
                case nonWordBoundary:
                    if (kind != Token.Kind.nonWordBoundary) {
                        if (kind == Token.Kind.wordBoundary) {
                            this.astBuilder.replaceCurTermWithDeadNode();
                            break;
                        } else if (getLocalFlags().isUnicode(this.mode)) {
                            this.astBuilder.addWordNonBoundaryAssertionPython(this.lexer.getPredefinedCharClass('w'), this.lexer.getPredefinedCharClass('W'));
                            break;
                        } else if (getLocalFlags().isLocale()) {
                            this.astBuilder.addWordNonBoundaryAssertionPython(this.lexer.getLocaleData().getWordCharacters(), this.lexer.getLocaleData().getNonWordCharacters());
                            break;
                        } else {
                            this.astBuilder.addWordNonBoundaryAssertionPython(Constants.WORD_CHARS, Constants.NON_WORD_CHARS);
                            break;
                        }
                    } else {
                        break;
                    }
                case backReference:
                    Token.BackReference backReference = (Token.BackReference) token;
                    verifyGroupReference(backReference);
                    this.astBuilder.addBackReference(backReference, getLocalFlags().isIgnoreCase());
                    break;
                case quantifier:
                    if (kind == Token.Kind.quantifier) {
                        throw syntaxError(PyErrorMessages.MULTIPLE_REPEAT, RegexSyntaxException.ErrorCode.InvalidQuantifier);
                    }
                    if (this.astBuilder.getCurTerm() == null || !QUANTIFIER_PREV.contains(kind)) {
                        throw syntaxError("nothing to repeat", RegexSyntaxException.ErrorCode.InvalidQuantifier);
                    }
                    this.astBuilder.addQuantifier((Token.Quantifier) token);
                    break;
                    break;
                case alternation:
                    if (!this.astBuilder.getCurGroup().isConditionalBackReferenceGroup() || this.astBuilder.getCurGroup().getAlternatives().size() != 2) {
                        this.astBuilder.nextSequence();
                        break;
                    } else {
                        throw syntaxError("conditional backref with more than two branches", RegexSyntaxException.ErrorCode.InvalidBackReference);
                    }
                case captureGroupBegin:
                    this.astBuilder.pushCaptureGroup(token);
                    break;
                case nonCaptureGroupBegin:
                    this.astBuilder.pushGroup(token);
                    break;
                case atomicGroupBegin:
                    this.astBuilder.pushAtomicGroup(token);
                    break;
                case lookAheadAssertionBegin:
                    this.astBuilder.pushLookAheadAssertion(token, ((Token.LookAheadAssertionBegin) token).isNegated());
                    break;
                case lookBehindAssertionBegin:
                    this.astBuilder.pushLookBehindAssertion(token, ((Token.LookBehindAssertionBegin) token).isNegated());
                    break;
                case groupEnd:
                    if (this.astBuilder.getCurGroup().getParent() instanceof RegexASTRootNode) {
                        throw syntaxError("unbalanced parenthesis", RegexSyntaxException.ErrorCode.UnmatchedParenthesis);
                    }
                    if (this.astBuilder.getCurGroup().isLocalFlags()) {
                        this.lexer.popLocalFlags();
                    }
                    if (this.astBuilder.getCurGroup().isConditionalBackReferenceGroup() && this.astBuilder.getCurGroup().getAlternatives().size() == 1) {
                        this.astBuilder.nextSequence();
                    }
                    this.astBuilder.popGroup(token);
                    break;
                    break;
                case literalChar:
                    literalChar(((Token.LiteralCharacter) token).getCodePoint());
                    break;
                case charClass:
                    this.astBuilder.addCharClass((Token.CharacterClass) token);
                    break;
                case charClassBegin:
                    this.curCharClass.clear();
                    this.curCharClassCaseClosure.clear();
                    break;
                case charClassAtom:
                    ClassSetContents contents = ((Token.CharacterClassAtom) token).getContents();
                    if (!this.lexer.featureEnabledIgnoreCase() || contents.isCharacterClass()) {
                        this.curCharClass.addSet(contents.getCodePointSet());
                        break;
                    } else {
                        this.curCharClassCaseClosure.addSet(contents.getCodePointSet());
                        break;
                    }
                    break;
                case charClassEnd:
                    boolean z = !this.lexer.isCurCharClassInverted() && ((this.curCharClass.matchesSingleChar() && this.curCharClassCaseClosure.isEmpty()) || (this.curCharClass.isEmpty() && this.curCharClassCaseClosure.matchesSingleChar()));
                    if (this.lexer.featureEnabledIgnoreCase()) {
                        this.lexer.caseFoldUnfold(this.curCharClassCaseClosure);
                        this.curCharClass.addSet(this.curCharClassCaseClosure.get());
                    }
                    CodePointSet codePointSet = this.curCharClass.toCodePointSet();
                    this.astBuilder.addCharClass(this.lexer.isCurCharClassInverted() ? codePointSet.createInverse(this.lexer.source.getEncoding()) : codePointSet, z);
                    break;
                case conditionalBackreference:
                    Token.BackReference backReference2 = (Token.BackReference) token;
                    verifyGroupReference(backReference2);
                    arrayList.add(backReference2);
                    this.astBuilder.pushConditionalBackReferenceGroup(backReference2);
                    break;
                case inlineFlags:
                    Token.InlineFlags inlineFlags = (Token.InlineFlags) token;
                    if (inlineFlags.isGlobal()) {
                        if (!(token2 == null || (kind == Token.Kind.inlineFlags && ((Token.InlineFlags) token2).isGlobal()))) {
                            throw syntaxErrorAtAbs(PyErrorMessages.GLOBAL_FLAGS_NOT_AT_START, inlineFlags.getPosition(), RegexSyntaxException.ErrorCode.InvalidInlineFlag);
                        }
                        this.lexer.addGlobalFlags((PythonFlags) inlineFlags.getFlags());
                        break;
                    } else {
                        this.astBuilder.pushGroup(inlineFlags);
                        this.lexer.pushLocalFlags((PythonFlags) inlineFlags.getFlags());
                        break;
                    }
            }
        }
        if (this.lexer.source.getOptions().getMatchingMode() == MatchingMode.fullmatch) {
            this.astBuilder.popGroup();
            this.astBuilder.addDollar();
        }
        if (!this.astBuilder.curGroupIsRoot()) {
            throw syntaxErrorAtAbs("missing ), unterminated subpattern", this.astBuilder.getCurGroupStartPosition(), RegexSyntaxException.ErrorCode.UnmatchedParenthesis);
        }
        RegexAST popRootGroup = this.astBuilder.popRootGroup();
        for (Token.BackReference backReference3 : arrayList) {
            if (!$assertionsDisabled && backReference3.getGroupNumbers().length != 1) {
                throw new AssertionError();
            }
            if (backReference3.getGroupNumbers()[0] >= popRootGroup.getNumberOfCaptureGroups()) {
                throw syntaxErrorAtAbs(PyErrorMessages.invalidGroupReference(Integer.toString(backReference3.getGroupNumbers()[0])), backReference3.getPosition() + 3, RegexSyntaxException.ErrorCode.InvalidBackReference);
            }
        }
        this.lexer.fixFlags();
        return popRootGroup;
    }

    private void literalChar(int i) {
        if (!this.lexer.featureEnabledIgnoreCase()) {
            this.astBuilder.addCharClass(CodePointSet.create(i));
            return;
        }
        this.curCharClass.clear();
        this.curCharClass.addCodePoint(i);
        this.lexer.caseFoldUnfold(this.curCharClass);
        this.astBuilder.addCharClass(this.curCharClass.toCodePointSet(), true);
    }

    private void verifyGroupReference(Token.BackReference backReference) throws RegexSyntaxException {
        boolean z = backReference.kind == Token.Kind.conditionalBackreference;
        if (!$assertionsDisabled && backReference.getGroupNumbers().length != 1) {
            throw new AssertionError();
        }
        int i = backReference.getGroupNumbers()[0];
        boolean insideLookBehind = insideLookBehind();
        if (z && insideLookBehind && i >= this.lexer.numberOfCaptureGroupsSoFar()) {
            throw syntaxErrorHere(PyErrorMessages.CANNOT_REFER_TO_AN_OPEN_GROUP, RegexSyntaxException.ErrorCode.InvalidBackReference);
        }
        if (z && !insideLookBehind) {
            return;
        }
        RegexASTNode curGroup = this.astBuilder.getCurGroup();
        while (true) {
            RegexASTNode regexASTNode = curGroup;
            if (regexASTNode != null) {
                if ((regexASTNode instanceof Group) && ((Group) regexASTNode).getGroupNumber() == i) {
                    throw syntaxErrorAtAbs(PyErrorMessages.CANNOT_REFER_TO_AN_OPEN_GROUP, backReference.isNamedReference() ? backReference.getPosition() + 4 : backReference.getPosition(), RegexSyntaxException.ErrorCode.InvalidBackReference);
                }
                curGroup = regexASTNode.getParent();
            } else {
                if (this.astBuilder.getCurGroup() == null) {
                    return;
                }
                RegexASTSubtreeRootNode subTreeParent = this.astBuilder.getCurGroup().getSubTreeParent();
                while (true) {
                    RegexASTSubtreeRootNode regexASTSubtreeRootNode = subTreeParent;
                    if (regexASTSubtreeRootNode == null) {
                        return;
                    }
                    if ((regexASTSubtreeRootNode instanceof LookBehindAssertion) && ((LookBehindAssertion) regexASTSubtreeRootNode).getGroup().getEnclosedCaptureGroupsLo() <= i) {
                        throw syntaxErrorHere(PyErrorMessages.CANNOT_REFER_TO_GROUP_DEFINED_IN_THE_SAME_LOOKBEHIND_SUBPATTERN, RegexSyntaxException.ErrorCode.InvalidBackReference);
                    }
                    subTreeParent = regexASTSubtreeRootNode.getSubTreeParent();
                }
            }
        }
    }

    private boolean insideLookBehind() {
        boolean z = false;
        for (RegexASTSubtreeRootNode subTreeParent = this.astBuilder.getCurGroup().getSubTreeParent(); subTreeParent != null; subTreeParent = subTreeParent.getSubTreeParent()) {
            if (subTreeParent.isLookBehindAssertion()) {
                z = true;
            }
        }
        return z;
    }

    private RegexSyntaxException syntaxError(String str, RegexSyntaxException.ErrorCode errorCode) {
        return this.lexer.syntaxError(str, errorCode);
    }

    private RegexSyntaxException syntaxErrorHere(String str, RegexSyntaxException.ErrorCode errorCode) {
        return this.lexer.syntaxErrorHere(str, errorCode);
    }

    private RegexSyntaxException syntaxErrorAtAbs(String str, int i, RegexSyntaxException.ErrorCode errorCode) {
        return this.lexer.syntaxErrorAtAbs(str, i, errorCode);
    }

    static {
        $assertionsDisabled = !PythonRegexParser.class.desiredAssertionStatus();
        QUANTIFIER_PREV = EnumSet.of(Token.Kind.literalChar, Token.Kind.charClass, Token.Kind.charClassEnd, Token.Kind.groupEnd, Token.Kind.backReference);
    }
}
