package org.elasticsearch.xpack.ml.job.categorization;

import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig;
import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureUtils;

/* loaded from: input_file:org/elasticsearch/xpack/ml/job/categorization/CategorizationAnalyzer.class */
public class CategorizationAnalyzer implements Closeable {
    private final Analyzer analyzer;
    private final boolean closeAnalyzer;

    public CategorizationAnalyzer(AnalysisRegistry analysisRegistry, Environment environment, CategorizationAnalyzerConfig categorizationAnalyzerConfig) throws IOException {
        Tuple<Analyzer, Boolean> makeAnalyzer = makeAnalyzer(categorizationAnalyzerConfig, analysisRegistry, environment);
        this.analyzer = (Analyzer) makeAnalyzer.v1();
        this.closeAnalyzer = ((Boolean) makeAnalyzer.v2()).booleanValue();
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() {
        if (this.closeAnalyzer) {
            this.analyzer.close();
        }
    }

    public List<String> tokenizeField(String str, String str2) {
        ArrayList arrayList = new ArrayList();
        try {
            TokenStream tokenStream = this.analyzer.tokenStream(str, str2);
            try {
                tokenStream.reset();
                CharTermAttribute addAttribute = tokenStream.addAttribute(CharTermAttribute.class);
                while (tokenStream.incrementToken()) {
                    if (!addAttribute.toString().isEmpty()) {
                        arrayList.add(addAttribute.toString());
                    }
                }
                tokenStream.end();
                if (tokenStream != null) {
                    tokenStream.close();
                }
                return arrayList;
            } finally {
            }
        } catch (IOException e) {
            throw new ElasticsearchException("Failed to analyze value [" + str2 + "] of field [" + str + "]", e, new Object[0]);
        }
    }

    public static void verifyConfigBuilder(CategorizationAnalyzerConfig.Builder builder, AnalysisRegistry analysisRegistry, Environment environment) throws IOException {
        Tuple<Analyzer, Boolean> makeAnalyzer = makeAnalyzer(builder.build(), analysisRegistry, environment);
        if (((Boolean) makeAnalyzer.v2()).booleanValue()) {
            ((Analyzer) makeAnalyzer.v1()).close();
        }
    }

    private static Tuple<Analyzer, Boolean> makeAnalyzer(CategorizationAnalyzerConfig categorizationAnalyzerConfig, AnalysisRegistry analysisRegistry, Environment environment) throws IOException {
        String analyzer = categorizationAnalyzerConfig.getAnalyzer();
        if (analyzer != null) {
            Analyzer analyzer2 = analysisRegistry.getAnalyzer(analyzer);
            if (analyzer2 == null) {
                throw new IllegalArgumentException("Failed to find global analyzer [" + analyzer + "]");
            }
            return new Tuple<>(analyzer2, Boolean.FALSE);
        }
        List<CharFilterFactory> parseCharFilterFactories = parseCharFilterFactories(categorizationAnalyzerConfig, analysisRegistry, environment);
        Tuple<String, TokenizerFactory> parseTokenizerFactory = parseTokenizerFactory(categorizationAnalyzerConfig, analysisRegistry, environment);
        List<TokenFilterFactory> parseTokenFilterFactories = parseTokenFilterFactories(categorizationAnalyzerConfig, analysisRegistry, environment, parseTokenizerFactory, parseCharFilterFactories);
        return new Tuple<>(new CustomAnalyzer((String) parseTokenizerFactory.v1(), (TokenizerFactory) parseTokenizerFactory.v2(), (CharFilterFactory[]) parseCharFilterFactories.toArray(new CharFilterFactory[parseCharFilterFactories.size()]), (TokenFilterFactory[]) parseTokenFilterFactories.toArray(new TokenFilterFactory[parseTokenFilterFactories.size()])), Boolean.TRUE);
    }

    private static List<CharFilterFactory> parseCharFilterFactories(CategorizationAnalyzerConfig categorizationAnalyzerConfig, AnalysisRegistry analysisRegistry, Environment environment) throws IOException {
        CharFilterFactory charFilterFactory;
        List<CategorizationAnalyzerConfig.NameOrDefinition> charFilters = categorizationAnalyzerConfig.getCharFilters();
        ArrayList arrayList = new ArrayList();
        for (CategorizationAnalyzerConfig.NameOrDefinition nameOrDefinition : charFilters) {
            if (nameOrDefinition.name != null) {
                AnalysisModule.AnalysisProvider charFilterProvider = analysisRegistry.getCharFilterProvider(nameOrDefinition.name);
                if (charFilterProvider == null) {
                    throw new IllegalArgumentException("Failed to find global char filter under [" + nameOrDefinition.name + "]");
                }
                charFilterFactory = (CharFilterFactory) charFilterProvider.get(environment, nameOrDefinition.name);
            } else {
                String str = nameOrDefinition.definition.get(FileStructureUtils.MAPPING_TYPE_SETTING);
                if (str == null) {
                    throw new IllegalArgumentException("Missing [type] setting for char filter: " + nameOrDefinition.definition);
                }
                AnalysisModule.AnalysisProvider charFilterProvider2 = analysisRegistry.getCharFilterProvider(str);
                if (charFilterProvider2 == null) {
                    throw new IllegalArgumentException("Failed to find global char filter under [" + str + "]");
                }
                Settings augmentSettings = augmentSettings(nameOrDefinition.definition);
                charFilterFactory = (CharFilterFactory) charFilterProvider2.get(buildDummyIndexSettings(augmentSettings), environment, "_anonymous_charfilter", augmentSettings);
            }
            if (charFilterFactory == null) {
                throw new IllegalArgumentException("Failed to find char filter [" + nameOrDefinition + "]");
            }
            arrayList.add(charFilterFactory);
        }
        return arrayList;
    }

    private static Tuple<String, TokenizerFactory> parseTokenizerFactory(CategorizationAnalyzerConfig categorizationAnalyzerConfig, AnalysisRegistry analysisRegistry, Environment environment) throws IOException {
        String str;
        TokenizerFactory tokenizerFactory;
        CategorizationAnalyzerConfig.NameOrDefinition tokenizer = categorizationAnalyzerConfig.getTokenizer();
        if (tokenizer.name != null) {
            str = tokenizer.name;
            AnalysisModule.AnalysisProvider tokenizerProvider = analysisRegistry.getTokenizerProvider(str);
            if (tokenizerProvider == null) {
                throw new IllegalArgumentException("Failed to find global tokenizer under [" + str + "]");
            }
            tokenizerFactory = (TokenizerFactory) tokenizerProvider.get(environment, str);
        } else {
            String str2 = tokenizer.definition.get(FileStructureUtils.MAPPING_TYPE_SETTING);
            if (str2 == null) {
                throw new IllegalArgumentException("Missing [type] setting for tokenizer: " + tokenizer.definition);
            }
            AnalysisModule.AnalysisProvider tokenizerProvider2 = analysisRegistry.getTokenizerProvider(str2);
            if (tokenizerProvider2 == null) {
                throw new IllegalArgumentException("Failed to find global tokenizer under [" + str2 + "]");
            }
            Settings augmentSettings = augmentSettings(tokenizer.definition);
            str = "_anonymous_tokenizer";
            tokenizerFactory = (TokenizerFactory) tokenizerProvider2.get(buildDummyIndexSettings(augmentSettings), environment, str, augmentSettings);
        }
        return new Tuple<>(str, tokenizerFactory);
    }

    private static List<TokenFilterFactory> parseTokenFilterFactories(CategorizationAnalyzerConfig categorizationAnalyzerConfig, AnalysisRegistry analysisRegistry, Environment environment, Tuple<String, TokenizerFactory> tuple, List<CharFilterFactory> list) throws IOException {
        TokenFilterFactory chainAwareTokenFilterFactory;
        List<CategorizationAnalyzerConfig.NameOrDefinition> tokenFilters = categorizationAnalyzerConfig.getTokenFilters();
        TransportAnalyzeAction.DeferredTokenFilterRegistry deferredTokenFilterRegistry = new TransportAnalyzeAction.DeferredTokenFilterRegistry(analysisRegistry, (IndexSettings) null);
        ArrayList arrayList = new ArrayList();
        for (CategorizationAnalyzerConfig.NameOrDefinition nameOrDefinition : tokenFilters) {
            if (nameOrDefinition.name != null) {
                AnalysisModule.AnalysisProvider tokenFilterProvider = analysisRegistry.getTokenFilterProvider(nameOrDefinition.name);
                if (tokenFilterProvider == null) {
                    throw new IllegalArgumentException("Failed to find global token filter under [" + nameOrDefinition.name + "]");
                }
                chainAwareTokenFilterFactory = (TokenFilterFactory) tokenFilterProvider.get(environment, nameOrDefinition.name);
            } else {
                String str = nameOrDefinition.definition.get(FileStructureUtils.MAPPING_TYPE_SETTING);
                if (str == null) {
                    throw new IllegalArgumentException("Missing [type] setting for token filter: " + nameOrDefinition.definition);
                }
                AnalysisModule.AnalysisProvider tokenFilterProvider2 = analysisRegistry.getTokenFilterProvider(str);
                if (tokenFilterProvider2 == null) {
                    throw new IllegalArgumentException("Failed to find global token filter under [" + str + "]");
                }
                Settings augmentSettings = augmentSettings(nameOrDefinition.definition);
                chainAwareTokenFilterFactory = ((TokenFilterFactory) tokenFilterProvider2.get(buildDummyIndexSettings(augmentSettings), environment, "_anonymous_tokenfilter", augmentSettings)).getChainAwareTokenFilterFactory((TokenizerFactory) tuple.v2(), list, arrayList, deferredTokenFilterRegistry);
            }
            if (chainAwareTokenFilterFactory == null) {
                throw new IllegalArgumentException("Failed to find or create token filter [" + nameOrDefinition + "]");
            }
            arrayList.add(chainAwareTokenFilterFactory);
        }
        return arrayList;
    }

    private static IndexSettings buildDummyIndexSettings(Settings settings) {
        return new IndexSettings(IndexMetaData.builder("_na_").settings(settings).build(), Settings.EMPTY);
    }

    private static Settings augmentSettings(Settings settings) {
        return Settings.builder().put(settings).put("index.version.created", Version.CURRENT).put("index.number_of_replicas", 0).put("index.number_of_shards", 1).put("index.uuid", UUIDs.randomBase64UUID()).build();
    }
}
