/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.corpus.document;

import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.corpus.document.sentence.word.CompoundWord;
import com.hankcs.hanlp.corpus.document.sentence.word.IWord;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.utility.Predefine;
import java.io.File;
import java.io.Serializable;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Document
implements Serializable {
    public List<Sentence> sentenceList;

    public Document(List<Sentence> sentenceList) {
        this.sentenceList = sentenceList;
    }

    public static Document create(String param) {
        Pattern pattern = Pattern.compile(".+?((\u3002/w)|(\uff01/w )|(\uff1f/w )|\\n|$)");
        Matcher matcher = pattern.matcher(param);
        LinkedList<Sentence> sentenceList = new LinkedList<Sentence>();
        while (matcher.find()) {
            String single = matcher.group();
            Sentence sentence = Sentence.create(single);
            if (sentence == null) {
                Predefine.logger.warning("\u4f7f\u7528" + single + "\u6784\u5efa\u53e5\u5b50\u5931\u8d25");
                return null;
            }
            sentenceList.add(sentence);
        }
        return new Document(sentenceList);
    }

    public List<IWord> getWordList() {
        LinkedList<IWord> wordList = new LinkedList<IWord>();
        for (Sentence sentence : this.sentenceList) {
            wordList.addAll(sentence.wordList);
        }
        return wordList;
    }

    public List<Word> getSimpleWordList() {
        List<IWord> wordList = this.getWordList();
        LinkedList<Word> simpleWordList = new LinkedList<Word>();
        for (IWord word : wordList) {
            if (word instanceof CompoundWord) {
                simpleWordList.addAll(((CompoundWord)word).innerList);
                continue;
            }
            simpleWordList.add((Word)word);
        }
        return simpleWordList;
    }

    public List<List<Word>> getSimpleSentenceList() {
        LinkedList<List<Word>> simpleList = new LinkedList<List<Word>>();
        for (Sentence sentence : this.sentenceList) {
            LinkedList<Word> wordList = new LinkedList<Word>();
            for (IWord word : sentence.wordList) {
                if (word instanceof CompoundWord) {
                    for (Word inner : ((CompoundWord)word).innerList) {
                        wordList.add(inner);
                    }
                    continue;
                }
                wordList.add((Word)word);
            }
            simpleList.add(wordList);
        }
        return simpleList;
    }

    public List<List<IWord>> getComplexSentenceList() {
        LinkedList<List<IWord>> complexList = new LinkedList<List<IWord>>();
        for (Sentence sentence : this.sentenceList) {
            complexList.add(sentence.wordList);
        }
        return complexList;
    }

    public List<List<Word>> getSimpleSentenceList(boolean spilt) {
        LinkedList<List<Word>> simpleList = new LinkedList<List<Word>>();
        for (Sentence sentence : this.sentenceList) {
            LinkedList<Word> wordList = new LinkedList<Word>();
            for (IWord word : sentence.wordList) {
                if (word instanceof CompoundWord) {
                    if (spilt) {
                        for (Word inner : ((CompoundWord)word).innerList) {
                            wordList.add(inner);
                        }
                        continue;
                    }
                    wordList.add(((CompoundWord)word).toWord());
                    continue;
                }
                wordList.add((Word)word);
            }
            simpleList.add(wordList);
        }
        return simpleList;
    }

    public List<List<Word>> getSimpleSentenceList(Set<String> labelSet) {
        LinkedList<List<Word>> simpleList = new LinkedList<List<Word>>();
        for (Sentence sentence : this.sentenceList) {
            LinkedList<Word> wordList = new LinkedList<Word>();
            for (IWord word : sentence.wordList) {
                if (word instanceof CompoundWord) {
                    if (labelSet.contains(word.getLabel())) {
                        for (Word inner : ((CompoundWord)word).innerList) {
                            wordList.add(inner);
                        }
                        continue;
                    }
                    wordList.add(((CompoundWord)word).toWord());
                    continue;
                }
                wordList.add((Word)word);
            }
            simpleList.add(wordList);
        }
        return simpleList;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        for (Sentence sentence : this.sentenceList) {
            sb.append(sentence);
            sb.append(' ');
        }
        if (sb.length() > 0) {
            sb.deleteCharAt(sb.length() - 1);
        }
        return sb.toString();
    }

    public static Document create(File file) {
        IOUtil.LineIterator lineIterator = new IOUtil.LineIterator(file.getAbsolutePath());
        LinkedList<Sentence> sentenceList = new LinkedList<Sentence>();
        for (String line : lineIterator) {
            if ((line = line.trim()).isEmpty()) continue;
            Sentence sentence = Sentence.create(line);
            if (sentence == null) {
                Predefine.logger.warning("\u4f7f\u7528 " + line + " \u521b\u5efa\u53e5\u5b50\u5931\u8d25");
                return null;
            }
            sentenceList.add(sentence);
        }
        return new Document(sentenceList);
    }
}

