/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.language;

import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.ngram.NgramExtractor;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import com.optimaize.langdetect.text.RemoveMinorityScriptsTextFilter;
import com.optimaize.langdetect.text.TextFilter;
import com.optimaize.langdetect.text.TextObjectFactory;
import com.optimaize.langdetect.text.TextObjectFactoryBuilder;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.jetbrains.annotations.Nullable;
import org.languagetool.DetectedLanguage;
import org.languagetool.Experimental;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.language.CommonWords;
import org.languagetool.language.FastText;
import org.languagetool.language.NGramLangIdentifier;
import org.languagetool.language.UnicodeBasedLangIdentifier;
import org.languagetool.noop.NoopLanguage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LanguageIdentifier {
    private static final Logger logger = LoggerFactory.getLogger(LanguageIdentifier.class);
    private static final double MINIMAL_CONFIDENCE = 0.9;
    private static final int SHORT_ALGO_THRESHOLD = 50;
    private static final int CONSIDER_ONLY_PREFERRED_THRESHOLD = 50;
    private static final List<String> ignoreLangCodes = Arrays.asList("ast", "gl");
    private static final List<String> externalLangCodes = Arrays.asList("eo");
    private static final float THRESHOLD = 0.9f;
    private final LanguageDetector languageDetector;
    private final TextObjectFactory textObjectFactory;
    private final int maxLength;
    private final UnicodeBasedLangIdentifier unicodeIdentifier = new UnicodeBasedLangIdentifier();
    private FastText fastText;
    private NGramLangIdentifier ngram;

    public LanguageIdentifier() {
        this(1000);
    }

    public LanguageIdentifier(int maxLength) {
        if (maxLength < 10) {
            throw new IllegalArgumentException("maxLength must be >= 10 (but values > 100 are recommended): " + maxLength);
        }
        this.maxLength = maxLength;
        try {
            List<LanguageProfile> profiles = this.loadProfiles(LanguageIdentifier.getLanguageCodes());
            this.languageDetector = LanguageDetectorBuilder.create((NgramExtractor)NgramExtractors.standard()).minimalConfidence(0.9).shortTextAlgorithm(50).withProfiles(profiles).build();
            this.textObjectFactory = new TextObjectFactoryBuilder().maxTextLength(10000).withTextFilter((TextFilter)ImprovedUrlTextFilter.getInstance()).withTextFilter((TextFilter)RemoveMinorityScriptsTextFilter.forThreshold((double)0.3)).withTextFilter((TextFilter)new RemoveEMailSignatureFilter()).withTextFilter((TextFilter)new RemoveMentionFilter()).withTextFilter((TextFilter)new RemoveNonBreakingSpaces()).build();
        }
        catch (IOException e) {
            throw new RuntimeException("Could not set up language identifier", e);
        }
    }

    public void enableFasttext(File fasttextBinary, File fasttextModel) {
        if (fasttextBinary != null && fasttextModel != null) {
            try {
                this.fastText = new FastText(fasttextModel, fasttextBinary);
                logger.info("Started fasttext process for language identification: Binary " + fasttextBinary + " with model @ " + fasttextModel);
            }
            catch (IOException e) {
                throw new RuntimeException("Could not start fasttext process for language identification @ " + fasttextBinary + " with model @ " + fasttextModel, e);
            }
        }
    }

    public boolean isFastTextEnabled() {
        return this.fastText != null;
    }

    public void enableNgrams(File ngramDir) {
        try {
            logger.info("Loading ngram data for language identification from " + ngramDir + "...");
            this.ngram = new NGramLangIdentifier(ngramDir, 50);
            logger.info("Loaded ngram data for language identification from " + ngramDir);
        }
        catch (IOException e) {
            throw new RuntimeException("Could not load ngram data language identification from " + ngramDir, e);
        }
    }

    private static List<String> getLanguageCodes() {
        ArrayList<String> langCodes = new ArrayList<String>();
        for (Language lang : Languages.get()) {
            String langCode = lang.getShortCode();
            boolean ignore = lang.isVariant() || ignoreLangCodes.contains(langCode) || externalLangCodes.contains(langCode);
            if (ignore) continue;
            if ("zh".equals(langCode)) {
                langCodes.add("zh-CN");
                langCodes.add("zh-TW");
                continue;
            }
            if (langCodes.contains(langCode)) continue;
            langCodes.add(langCode);
        }
        return langCodes;
    }

    private List<LanguageProfile> loadProfiles(List<String> langCodes) throws IOException {
        LanguageProfileReader profileReader = new LanguageProfileReader();
        List profiles = profileReader.read(langCodes);
        for (String externalLangCode : externalLangCodes) {
            String profilePath = "/" + externalLangCode + "/" + externalLangCode + ".profile";
            if (!JLanguageTool.getDataBroker().resourceExists(profilePath)) continue;
            InputStream profile = JLanguageTool.getDataBroker().getFromResourceDirAsStream(profilePath);
            Throwable throwable = null;
            try {
                profiles.add(new LanguageProfileReader().read(profile));
            }
            catch (Throwable throwable2) {
                throwable = throwable2;
                throw throwable2;
            }
            finally {
                if (profile == null) continue;
                if (throwable != null) {
                    try {
                        profile.close();
                    }
                    catch (Throwable throwable3) {
                        throwable.addSuppressed(throwable3);
                    }
                    continue;
                }
                profile.close();
            }
        }
        return profiles;
    }

    public String cleanAndShortenText(String text) {
        String shortText = text.length() > this.maxLength ? text.substring(0, this.maxLength) : text;
        shortText = shortText.replaceAll("\ufeff+", " ");
        if (this.fastText != null || this.ngram != null) {
            shortText = ImprovedUrlTextFilter.getInstance().filter(shortText);
            shortText = new RemoveEMailSignatureFilter().filter(shortText);
            shortText = new RemoveMentionFilter().filter(shortText);
            shortText = new RemoveNonBreakingSpaces().filter(shortText);
        }
        return shortText;
    }

    @Nullable
    public Language detectLanguage(String cleanText) {
        DetectedLanguage detectedLanguage = this.detectLanguage(cleanText, Collections.emptyList(), Collections.emptyList());
        if (detectedLanguage == null) {
            return null;
        }
        return detectedLanguage.getDetectedLanguage();
    }

    @Nullable
    @Experimental
    DetectedLanguage detectLanguageWithDetails(String cleanText) {
        return this.detectLanguage(cleanText, Collections.emptyList(), Collections.emptyList());
    }

    @Nullable
    public DetectedLanguage detectLanguage(String cleanText, List<String> noopLangsTmp, List<String> preferredLangsTmp) {
        Objects.requireNonNull(noopLangsTmp);
        Objects.requireNonNull(preferredLangsTmp);
        List<String> additionalLangs = noopLangsTmp.stream().map(k -> k.equals("nb") ? "no" : k).collect(Collectors.toList());
        List preferredLangs = preferredLangsTmp.stream().map(k -> k.equals("nb") ? "no" : k).collect(Collectors.toCollection(ArrayList::new));
        if (preferredLangs.stream().anyMatch(k -> k.contains("-"))) {
            throw new IllegalArgumentException("preferredLanguages may only contain language codes without variants (e.g. 'en', but not 'en-US'): " + preferredLangs + ". Use 'preferredVariants' to specify variants.");
        }
        List<String> domLangCodes = this.unicodeIdentifier.getDominantLangCodes(cleanText);
        String domLangStr = String.join((CharSequence)",", domLangCodes);
        if (domLangStr.equals("th") || domLangStr.equals("he") || domLangStr.equals("ko") || domLangStr.equals("hi,mr")) {
            return new DetectedLanguage(null, new NoopLanguage());
        }
        if (!(preferredLangs.contains("ru") || preferredLangs.contains("uk") || preferredLangs.contains("be") || preferredLangs.contains("zh") || preferredLangs.contains("hi") || preferredLangs.contains("mr"))) {
            preferredLangs.addAll(domLangCodes);
            additionalLangs.addAll(domLangCodes);
        }
        Map.Entry<String, Double> result = null;
        boolean fasttextFailed = false;
        if (this.fastText != null || this.ngram != null) {
            try {
                Map<String, Double> scores;
                boolean usingFastText = false;
                if ((cleanText.length() <= 50 || this.fastText == null) && this.ngram != null) {
                    scores = this.ngram.detectLanguages(cleanText.trim(), additionalLangs);
                } else {
                    usingFastText = true;
                    scores = this.fastText.runFasttext(cleanText, additionalLangs);
                }
                result = this.getHighestScoringResult(scores);
                if (usingFastText && result.getValue().floatValue() < 0.9f || result.getKey().equals("zz")) {
                    CommonWords commonWords = new CommonWords();
                    Map<Language, Integer> lang2Count = commonWords.getKnownWordsPerLanguage(cleanText);
                    HashSet<String> baseLangAlreadyHandled = new HashSet<String>();
                    for (Map.Entry<Language, Integer> entry : lang2Count.entrySet()) {
                        String langCode = entry.getKey().getShortCode();
                        if (baseLangAlreadyHandled.contains(langCode)) continue;
                        baseLangAlreadyHandled.add(langCode);
                        if (scores.containsKey(langCode)) {
                            scores.put(langCode, scores.get(langCode) + Double.valueOf(entry.getValue().intValue()));
                            continue;
                        }
                        scores.put(langCode, (double)entry.getValue());
                    }
                    result = this.getHighestScoringResult(scores);
                }
                if (preferredLangs.contains("no") && !preferredLangs.contains("da")) {
                    scores.keySet().removeIf(k -> k.equals("da"));
                    result = this.getHighestScoringResult(scores);
                }
                if (cleanText.length() < 50 && preferredLangs.size() > 0) {
                    scores.keySet().removeIf(k -> !preferredLangs.contains(k));
                    result = this.getHighestScoringResult(scores);
                }
                double newScore = 0.99 / (30.0 / (double)Math.min(cleanText.length(), 30));
                result = new AbstractMap.SimpleImmutableEntry<String, Double>(result.getKey(), newScore);
            }
            catch (FastText.FastTextException e) {
                if (e.isDisabled()) {
                    this.fastText = null;
                    logger.error("Fasttext disabled", (Throwable)e);
                } else {
                    logger.error("Fasttext failed, fallback used", (Throwable)e);
                    fasttextFailed = true;
                }
            }
            catch (Exception e) {
                this.fastText = null;
                logger.error("Fasttext disabled", (Throwable)e);
            }
        }
        if (this.fastText == null && this.ngram == null || fasttextFailed) {
            cleanText = this.textObjectFactory.forText((CharSequence)cleanText).toString();
            result = this.detectLanguageCode(cleanText);
            if (additionalLangs.size() > 0) {
                logger.warn("Cannot consider noopLanguages because not in fastText mode: " + additionalLangs);
            }
        }
        if (result != null && result.getKey() != null && LanguageIdentifier.canLanguageBeDetected((String)result.getKey(), additionalLangs)) {
            return new DetectedLanguage(null, Languages.getLanguageForShortCode(result.getKey(), additionalLangs), result.getValue().floatValue());
        }
        return null;
    }

    static boolean canLanguageBeDetected(String langCode, List<String> additionalLanguageCodes) {
        return Languages.isLanguageSupported(langCode) || additionalLanguageCodes.contains(langCode);
    }

    private Map.Entry<String, Double> getHighestScoringResult(Map<String, Double> probs) {
        String result = null;
        double max = -1.0;
        for (Map.Entry<String, Double> entry : probs.entrySet()) {
            if (!(entry.getValue() > max)) continue;
            max = entry.getValue();
            result = entry.getKey();
        }
        return new AbstractMap.SimpleImmutableEntry<Object, Double>(result, max);
    }

    @Nullable
    private Map.Entry<String, Double> detectLanguageCode(String text) {
        List lang = this.languageDetector.getProbabilities((CharSequence)text);
        if (lang.size() > 0) {
            String code = ((com.optimaize.langdetect.DetectedLanguage)lang.get(0)).getLocale().getLanguage();
            double prob = ((com.optimaize.langdetect.DetectedLanguage)lang.get(0)).getProbability();
            return new AbstractMap.SimpleImmutableEntry<String, Double>(code, prob);
        }
        return null;
    }

    static class ImprovedUrlTextFilter
    implements TextFilter {
        private static final Pattern URL_REGEX = Pattern.compile("https?://[-_.?&~;+=/#%0-9A-Za-z]+");
        private static final Pattern MAIL_REGEX = Pattern.compile("[-_.0-9A-Za-z]+@[-_0-9A-Za-z]+[-_.0-9A-Za-z]+");
        private static final ImprovedUrlTextFilter INSTANCE = new ImprovedUrlTextFilter();

        ImprovedUrlTextFilter() {
        }

        static ImprovedUrlTextFilter getInstance() {
            return INSTANCE;
        }

        public String filter(CharSequence text) {
            String modified = URL_REGEX.matcher(text).replaceAll(" ");
            return MAIL_REGEX.matcher(modified).replaceAll(" ");
        }
    }

    static class RemoveNonBreakingSpaces
    implements TextFilter {
        RemoveNonBreakingSpaces() {
        }

        public String filter(CharSequence text) {
            return text.toString().replace('\u00a0', ' ');
        }
    }

    static class RemoveMentionFilter
    implements TextFilter {
        private static final Pattern MENTION = Pattern.compile("@[A-Za-z0-9_]+");

        RemoveMentionFilter() {
        }

        public String filter(CharSequence text) {
            return MENTION.matcher(text.toString()).replaceFirst("");
        }
    }

    static class RemoveEMailSignatureFilter
    implements TextFilter {
        private static final Pattern SIGNATURE = Pattern.compile("\n-- \n.*", 32);

        RemoveEMailSignatureFilter() {
        }

        public String filter(CharSequence text) {
            return SIGNATURE.matcher(text.toString()).replaceFirst("");
        }
    }
}

