package org.tribuo.util.tokens.impl;

import com.oracle.labs.mlrg.olcut.config.Config;
import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance;
import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl;
import java.util.Arrays;
import org.tribuo.util.tokens.impl.SplitFunctionTokenizer;

/* loaded from: input_file:org/tribuo/util/tokens/impl/SplitCharactersTokenizer.class */
public class SplitCharactersTokenizer extends SplitFunctionTokenizer {
    public static final char[] DEFAULT_SPLIT_CHARACTERS = {'*', '(', ')', '&', '[', ']', '{', '}', '`', '\'', '|', ';', ':', '\\', '!', '-', '?'};
    public static final char[] DEFAULT_SPLIT_EXCEPTING_IN_DIGITS_CHARACTERS = {'.', ',', '/'};

    @Config(description = "The characters to split on.")
    private char[] splitCharacters;

    @Config(description = "The characters to split on unless we're in a number.")
    private char[] splitXDigitsCharacters;

    /* loaded from: input_file:org/tribuo/util/tokens/impl/SplitCharactersTokenizer$SplitCharactersSplitterFunction.class */
    public static class SplitCharactersSplitterFunction implements SplitFunctionTokenizer.SplitFunction {
        private final char[] splitCharacters;
        private final char[] splitXDigitsCharacters;

        public SplitCharactersSplitterFunction(char[] cArr, char[] cArr2) {
            this.splitCharacters = cArr;
            this.splitXDigitsCharacters = cArr2;
        }

        @Override // org.tribuo.util.tokens.impl.SplitFunctionTokenizer.SplitFunction
        public SplitFunctionTokenizer.SplitResult apply(int i, int i2, CharSequence charSequence) {
            return isSplitCharacter((char) i) ? SplitFunctionTokenizer.SplitResult.SPLIT_AT : (!isSplitXDigitCharacter((char) i) || (i2 != 0 && i2 != charSequence.length() - 1 && Character.isDigit(charSequence.charAt(i2 - 1)) && Character.isDigit(charSequence.charAt(i2 + 1)))) ? SplitFunctionTokenizer.SplitResult.NO_SPLIT_WORD : SplitFunctionTokenizer.SplitResult.SPLIT_AT;
        }

        public boolean isSplitCharacter(char c) {
            return SplitCharactersTokenizer.isCharacter(c, this.splitCharacters) || Character.isWhitespace(c);
        }

        public boolean isSplitXDigitCharacter(char c) {
            return SplitCharactersTokenizer.isCharacter(c, this.splitXDigitsCharacters);
        }
    }

    public SplitCharactersTokenizer() {
        this.splitCharacters = DEFAULT_SPLIT_CHARACTERS;
        this.splitXDigitsCharacters = DEFAULT_SPLIT_EXCEPTING_IN_DIGITS_CHARACTERS;
        postConfig();
    }

    public void postConfig() {
        this.splitFunction = new SplitCharactersSplitterFunction(this.splitCharacters, this.splitXDigitsCharacters);
    }

    public SplitCharactersTokenizer(char[] cArr, char[] cArr2) {
        this.splitCharacters = DEFAULT_SPLIT_CHARACTERS;
        this.splitXDigitsCharacters = DEFAULT_SPLIT_EXCEPTING_IN_DIGITS_CHARACTERS;
        this.splitCharacters = cArr;
        this.splitXDigitsCharacters = cArr2;
        postConfig();
    }

    public static SplitCharactersTokenizer createWhitespaceTokenizer() {
        return new SplitCharactersTokenizer(new char[0], new char[0]);
    }

    /* renamed from: getProvenance, reason: merged with bridge method [inline-methods] */
    public ConfiguredObjectProvenance m8getProvenance() {
        return new ConfiguredObjectProvenanceImpl(this, "Tokenizer");
    }

    @Deprecated
    public boolean isSplitCharacter(char c) {
        return isCharacter(c, this.splitCharacters) || Character.isWhitespace(c);
    }

    @Deprecated
    public boolean isSplitXDigitCharacter(char c) {
        return isCharacter(c, this.splitXDigitsCharacters);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static boolean isCharacter(char c, char[] cArr) {
        if (cArr == null) {
            return false;
        }
        for (char c2 : cArr) {
            if (c2 == c) {
                return true;
            }
        }
        return false;
    }

    @Deprecated
    public char[] getSplitCharacters() {
        return Arrays.copyOf(this.splitCharacters, this.splitCharacters.length);
    }

    @Deprecated
    public char[] getSplitXDigitsCharacters() {
        return Arrays.copyOf(this.splitXDigitsCharacters, this.splitXDigitsCharacters.length);
    }

    @Override // org.tribuo.util.tokens.impl.SplitFunctionTokenizer
    /* renamed from: clone, reason: merged with bridge method [inline-methods] */
    public SplitCharactersTokenizer mo9clone() {
        return new SplitCharactersTokenizer(this.splitCharacters, this.splitXDigitsCharacters);
    }
}
