/*
 * Decompiled with CFR 0.152.
 */
package marytts.modules;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.exceptions.MaryConfigurationException;
import marytts.fst.FSTLookup;
import marytts.modules.InternalModule;
import marytts.modules.phonemiser.AllophoneSet;
import marytts.modules.phonemiser.TrainedLTS;
import marytts.server.MaryProperties;
import marytts.util.MaryRuntimeUtils;
import marytts.util.MaryUtils;
import marytts.util.dom.MaryDomUtils;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.traversal.NodeIterator;

public class JPhonemiser
extends InternalModule {
    protected Map<String, List<String>> userdict;
    protected FSTLookup lexicon;
    protected TrainedLTS lts;
    protected boolean removeTrailingOneFromPhones = true;
    protected AllophoneSet allophoneSet;
    protected Pattern punctuationPosRegex;
    protected Pattern unpronounceablePosRegex;

    public JPhonemiser(String propertyPrefix) throws IOException, MaryConfigurationException {
        this("JPhonemiser", MaryDataType.PARTSOFSPEECH, MaryDataType.PHONEMES, String.valueOf(propertyPrefix) + "allophoneset", String.valueOf(propertyPrefix) + "userdict", String.valueOf(propertyPrefix) + "lexicon", String.valueOf(propertyPrefix) + "lettertosound", String.valueOf(propertyPrefix) + "removeTrailingOneFromPhones");
    }

    public JPhonemiser(String componentName, MaryDataType inputType, MaryDataType outputType, String allophonesProperty, String userdictProperty, String lexiconProperty, String ltsProperty) throws IOException, MaryConfigurationException {
        this(componentName, inputType, outputType, allophonesProperty, userdictProperty, lexiconProperty, ltsProperty, null);
    }

    public JPhonemiser(String componentName, MaryDataType inputType, MaryDataType outputType, String allophonesProperty, String userdictProperty, String lexiconProperty, String ltsProperty, String removetrailingonefromphonesProperty) throws IOException, MaryConfigurationException {
        super(componentName, inputType, outputType, MaryRuntimeUtils.needAllophoneSet(allophonesProperty).getLocale());
        this.allophoneSet = MaryRuntimeUtils.needAllophoneSet(allophonesProperty);
        String userdictFilename = MaryProperties.getFilename(userdictProperty);
        if (userdictFilename != null) {
            if (new File(userdictFilename).exists()) {
                this.userdict = this.readLexicon(userdictFilename);
            } else {
                this.logger.info("User dictionary '" + userdictFilename + "' for locale '" + this.getLocale() + "' does not exist. Ignoring.");
            }
        }
        InputStream lexiconStream = MaryProperties.needStream(lexiconProperty);
        this.lexicon = new FSTLookup(lexiconStream, lexiconProperty);
        InputStream ltsStream = MaryProperties.needStream(ltsProperty);
        if (removetrailingonefromphonesProperty != null) {
            this.removeTrailingOneFromPhones = MaryProperties.getBoolean(removetrailingonefromphonesProperty, true);
        }
        this.lts = new TrainedLTS(this.allophoneSet, ltsStream, this.removeTrailingOneFromPhones);
    }

    @Override
    public void startup() throws Exception {
        super.startup();
        this.setPunctuationPosRegex();
        this.setUnpronounceablePosRegex();
    }

    @Override
    public MaryData process(MaryData d) throws Exception {
        Document doc = d.getDocument();
        NodeIterator it = MaryDomUtils.createNodeIterator(doc, doc, "t");
        Element t = null;
        while ((t = (Element)it.nextNode()) != null) {
            if (t.hasAttribute("ph") && !t.getAttribute("ph").contains("*")) continue;
            String text = t.hasAttribute("sounds_like") ? t.getAttribute("sounds_like") : MaryDomUtils.tokenText(t);
            String pos = null;
            if (t.hasAttribute("pos")) {
                pos = t.getAttribute("pos");
            }
            if (!this.maybePronounceable(text, pos)) continue;
            StringBuilder ph = new StringBuilder();
            String g2pMethod = null;
            StringTokenizer st = new StringTokenizer(text, " -");
            while (st.hasMoreTokens()) {
                StringBuilder helper;
                String graph = st.nextToken();
                String phon = this.phonemise(graph, pos, helper = new StringBuilder());
                if (phon == null) continue;
                if (ph.length() == 0) {
                    g2pMethod = helper.toString();
                    ph.append(phon);
                    continue;
                }
                ph.append(" - ");
                ph.append(phon.replace('\'', ','));
            }
            if (ph == null || ph.length() <= 0) continue;
            this.setPh(t, ph.toString());
            t.setAttribute("g2p_method", g2pMethod);
        }
        MaryData result = new MaryData(this.outputType(), d.getLocale());
        result.setDocument(doc);
        return result;
    }

    public String phonemise(String text, String pos, StringBuilder g2pMethod) {
        String result = this.userdictLookup(text, pos);
        if (result != null) {
            g2pMethod.append("userdict");
            return result;
        }
        result = this.lexiconLookup(text, pos);
        if (result != null) {
            g2pMethod.append("lexicon");
            return result;
        }
        String normalised = MaryUtils.normaliseUnicodeLetters(text, this.getLocale());
        if (!normalised.equals(text)) {
            result = this.userdictLookup(normalised, pos);
            if (result != null) {
                g2pMethod.append("userdict");
                return result;
            }
            result = this.lexiconLookup(normalised, pos);
            if (result != null) {
                g2pMethod.append("lexicon");
                return result;
            }
        }
        String phones = this.lts.predictPronunciation(text);
        try {
            result = this.lts.syllabify(phones);
        }
        catch (IllegalArgumentException e) {
            this.logger.error(String.format("Problem with token <%s> [%s]: %s", text, phones, e.getMessage()));
        }
        if (result != null) {
            g2pMethod.append("rules");
            return result;
        }
        return null;
    }

    public String lexiconLookup(String text, String pos) {
        if (text == null || text.length() == 0) {
            return null;
        }
        String[] entries = this.lexiconLookupPrimitive(text, pos);
        if (entries.length == 0) {
            text = text.toLowerCase(this.getLocale());
            entries = this.lexiconLookupPrimitive(text, pos);
        }
        if (entries.length == 0) {
            text = String.valueOf(text.substring(0, 1).toUpperCase(this.getLocale())) + text.substring(1);
            entries = this.lexiconLookupPrimitive(text, pos);
        }
        if (entries.length == 0) {
            return null;
        }
        return entries[0];
    }

    private String[] lexiconLookupPrimitive(String text, String pos) {
        String[] entries;
        if (pos != null) {
            entries = this.lexicon.lookup(String.valueOf(text) + pos);
            if (entries.length == 0) {
                entries = this.lexicon.lookup(text);
            }
        } else {
            entries = this.lexicon.lookup(text);
        }
        return entries;
    }

    public String userdictLookup(String text, String pos) {
        if (this.userdict == null || text == null || text.length() == 0) {
            return null;
        }
        List<String> entries = this.userdict.get(text);
        if (entries == null) {
            text = text.toLowerCase(this.getLocale());
            entries = this.userdict.get(text);
        }
        if (entries == null) {
            text = String.valueOf(text.substring(0, 1).toUpperCase(this.getLocale())) + text.substring(1);
            entries = this.userdict.get(text);
        }
        if (entries == null) {
            return null;
        }
        String transcr = null;
        for (String entry : entries) {
            String[] parts = entry.split("\\|");
            transcr = parts[0];
            if (parts.length <= 1 || pos == null) continue;
            StringTokenizer tokenizer = new StringTokenizer(entry);
            while (tokenizer.hasMoreTokens()) {
                String onePos = tokenizer.nextToken();
                if (!pos.equals(onePos)) continue;
                return transcr;
            }
        }
        return transcr;
    }

    public AllophoneSet getAllophoneSet() {
        return this.allophoneSet;
    }

    protected Map<String, List<String>> readLexicon(String lexiconFilename) throws IOException {
        String line;
        this.logger.debug(String.format("Reading lexicon from '%s'", lexiconFilename));
        HashMap<String, List<String>> fLexicon = new HashMap<String, List<String>>();
        BufferedReader lexiconFile = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(lexiconFilename), "UTF-8"));
        while ((line = lexiconFile.readLine()) != null) {
            ArrayList<String> transcriptions;
            String pos;
            if (line.trim().equals("") || line.startsWith("#")) continue;
            String[] lineParts = line.split("\\s*\\|\\s*");
            String graphStr = lineParts[0];
            String phonStr = null;
            try {
                phonStr = lineParts[1];
            }
            catch (ArrayIndexOutOfBoundsException arrayIndexOutOfBoundsException) {
                this.logger.warn(String.format("Lexicon '%s': missing transcription for '%s'", lexiconFilename, graphStr));
                continue;
            }
            try {
                this.allophoneSet.splitIntoAllophones(phonStr);
            }
            catch (IllegalArgumentException e) {
                this.logger.warn(String.format("Lexicon '%s': invalid entry for '%s': %s", lexiconFilename, graphStr, e.getMessage()));
                continue;
            }
            String phonPosStr = phonStr;
            if (lineParts.length > 2 && !(pos = lineParts[2]).trim().equals("")) {
                phonPosStr = String.valueOf(phonPosStr) + "|" + pos;
            }
            if ((transcriptions = (ArrayList<String>)fLexicon.get(graphStr)) == null) {
                transcriptions = new ArrayList<String>();
                fLexicon.put(graphStr, transcriptions);
            }
            transcriptions.add(phonPosStr);
        }
        lexiconFile.close();
        return fLexicon;
    }

    protected void setPh(Element t, String ph) {
        if (!t.getTagName().equals("t")) {
            throw new DOMException(15, "Only t elements allowed, received " + t.getTagName() + ".");
        }
        if (t.hasAttribute("ph")) {
            String prevPh = t.getAttribute("ph");
            String newPh = prevPh.replaceFirst("\\*", ph);
            t.setAttribute("ph", newPh);
        } else {
            t.setAttribute("ph", ph);
        }
    }

    protected void setPunctuationPosRegex() {
        String language = this.getLocale().getLanguage();
        String propertyName = String.valueOf(language) + ".pos.punct.regex";
        String defaultRegex = "\\$PUNCT";
        String regex = MaryProperties.getProperty(propertyName);
        if (regex == null) {
            this.logger.debug(String.format("Property %s not set, using default", propertyName));
            regex = defaultRegex;
        } else {
            this.logger.debug(String.format("Using property %s", propertyName));
        }
        try {
            this.punctuationPosRegex = Pattern.compile(regex);
        }
        catch (PatternSyntaxException patternSyntaxException) {
            this.logger.error(String.format("Could not compile regex pattern /%s/, using default instead", regex));
            this.punctuationPosRegex = Pattern.compile(defaultRegex);
        }
        this.logger.debug(String.format("Punctuation regex pattern set to /%s/", this.punctuationPosRegex));
    }

    protected void setUnpronounceablePosRegex() {
        String language = this.getLocale().getLanguage();
        String propertyName = String.valueOf(language) + ".pos.unprounounceable.regex";
        String defaultRegex = "^[^a-zA-Z]+$";
        String regex = MaryProperties.getProperty(propertyName);
        if (regex == null) {
            this.logger.debug(String.format("Property %s not set, using default", propertyName));
            regex = defaultRegex;
        } else {
            this.logger.debug(String.format("Using property %s", propertyName));
        }
        try {
            this.unpronounceablePosRegex = Pattern.compile(regex);
        }
        catch (PatternSyntaxException patternSyntaxException) {
            this.logger.error(String.format("Could not compile regex pattern /%s/, using default instead", regex));
            this.unpronounceablePosRegex = Pattern.compile(defaultRegex);
        }
        this.logger.debug(String.format("Punctuation regex pattern set to /%s/", this.unpronounceablePosRegex));
    }

    public boolean isPosPunctuation(String pos) {
        return pos != null && this.punctuationPosRegex.matcher(pos).matches();
    }

    public boolean isUnpronounceable(String pos) {
        return pos != null && this.unpronounceablePosRegex.matcher(pos).matches();
    }

    public boolean maybePronounceable(String text, String pos) {
        if (text == null || text.isEmpty()) {
            return false;
        }
        if (text.matches(".*\\w.*")) {
            return true;
        }
        if (this.isPosPunctuation(pos)) {
            return false;
        }
        return !this.isUnpronounceable(pos);
    }
}

