/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.cluster.tui;

import cc.mallet.cluster.Clustering;
import cc.mallet.cluster.Clusterings;
import cc.mallet.cluster.util.ClusterUtils;
import cc.mallet.pipe.Noop;
import cc.mallet.pipe.Pipe;
import cc.mallet.types.Alphabet;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.util.CommandOption;
import cc.mallet.util.MalletLogger;
import cc.mallet.util.Randoms;
import gnu.trove.TIntHashSet;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.logging.Logger;

public class Clusterings2Clusterings {
    private static Logger logger = MalletLogger.getLogger(Clusterings2Clusterings.class.getName());
    static CommandOption.String inputFile = new CommandOption.String(Clusterings2Clusterings.class, "input", "FILENAME", true, "text.clusterings", "The filename from which to read the list of instances.", null);
    static CommandOption.String outputPrefixFile = new CommandOption.String(Clusterings2Clusterings.class, "output-prefix", "FILENAME", false, "text.clusterings", "The filename prefix to write output. Suffices 'train' and 'test' appended.", null);
    static CommandOption.Integer minClusterSize = new CommandOption.Integer(Clusterings2Clusterings.class, "min-cluster-size", "INTEGER", false, 1, "Remove clusters with fewer than this many Instances.", null);
    static CommandOption.Double trainingProportion = new CommandOption.Double(Clusterings2Clusterings.class, "training-proportion", "DOUBLE", false, 0.0, "Split into training and testing, with this percentage of instances reserved for training.", null);

    public static void main(String[] args) {
        LabelAlphabet lalph;
        CommandOption.setSummary(Clusterings2Clusterings.class, "A tool to manipulate Clusterings.");
        CommandOption.process(Clusterings2Clusterings.class, args);
        Clusterings clusterings = null;
        try {
            ObjectInputStream iis = new ObjectInputStream(new FileInputStream(Clusterings2Clusterings.inputFile.value));
            clusterings = (Clusterings)iis.readObject();
        }
        catch (Exception e) {
            System.err.println("Exception reading clusterings from " + Clusterings2Clusterings.inputFile.value + " " + e);
            e.printStackTrace();
        }
        logger.info("number clusterings=" + clusterings.size());
        if (Clusterings2Clusterings.minClusterSize.value > 1) {
            for (int i = 0; i < clusterings.size(); ++i) {
                Clustering clustering = clusterings.get(i);
                InstanceList oldInstances = clustering.getInstances();
                Alphabet alph = oldInstances.getDataAlphabet();
                lalph = (LabelAlphabet)oldInstances.getTargetAlphabet();
                if (alph == null) {
                    alph = new Alphabet();
                }
                if (lalph == null) {
                    lalph = new LabelAlphabet();
                }
                Noop noop = new Noop(alph, lalph);
                InstanceList newInstances = new InstanceList(noop);
                for (int j = 0; j < oldInstances.size(); ++j) {
                    int label = clustering.getLabel(j);
                    Instance instance = (Instance)oldInstances.get(j);
                    if (clustering.size(label) < Clusterings2Clusterings.minClusterSize.value) continue;
                    newInstances.add(((Pipe)noop).pipe(new Instance(instance.getData(), lalph.lookupLabel(new Integer(label)), instance.getName(), instance.getSource())));
                }
                clusterings.set(i, Clusterings2Clusterings.createSmallerClustering(newInstances));
            }
            if (Clusterings2Clusterings.outputPrefixFile.value != null) {
                try {
                    ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(Clusterings2Clusterings.outputPrefixFile.value));
                    oos.writeObject(clusterings);
                    oos.close();
                }
                catch (Exception e) {
                    logger.warning("Exception writing clustering to file " + Clusterings2Clusterings.outputPrefixFile.value + " " + e);
                    e.printStackTrace();
                }
            }
        }
        if (Clusterings2Clusterings.trainingProportion.value > 0.0) {
            int i;
            if (clusterings.size() > 1) {
                throw new IllegalArgumentException("Expect one clustering to do train/test split, not " + clusterings.size());
            }
            Clustering clustering = clusterings.get(0);
            int targetTrainSize = (int)(Clusterings2Clusterings.trainingProportion.value * (double)clustering.getNumInstances());
            TIntHashSet clustersSampled = new TIntHashSet();
            Randoms random = new Randoms(123);
            lalph = new LabelAlphabet();
            InstanceList trainingInstances = new InstanceList(new Noop(null, lalph));
            while (trainingInstances.size() < targetTrainSize) {
                int cluster = random.nextInt(clustering.getNumClusters());
                if (clustersSampled.contains(cluster)) continue;
                clustersSampled.add(cluster);
                InstanceList instances = clustering.getCluster(cluster);
                for (i = 0; i < instances.size(); ++i) {
                    Instance inst = (Instance)instances.get(i);
                    trainingInstances.add(new Instance(inst.getData(), lalph.lookupLabel(new Integer(cluster)), inst.getName(), inst.getSource()));
                }
            }
            trainingInstances.shuffle(random);
            Clustering trainingClustering = Clusterings2Clusterings.createSmallerClustering(trainingInstances);
            InstanceList testingInstances = new InstanceList(null, lalph);
            for (i = 0; i < clustering.getNumClusters(); ++i) {
                if (clustersSampled.contains(i)) continue;
                InstanceList instances = clustering.getCluster(i);
                for (int j = 0; j < instances.size(); ++j) {
                    Instance inst = (Instance)instances.get(j);
                    testingInstances.add(new Instance(inst.getData(), lalph.lookupLabel(new Integer(i)), inst.getName(), inst.getSource()));
                }
            }
            testingInstances.shuffle(random);
            Clustering testingClustering = Clusterings2Clusterings.createSmallerClustering(testingInstances);
            logger.info(Clusterings2Clusterings.outputPrefixFile.value + ".train : " + trainingClustering.getNumClusters() + " objects");
            logger.info(Clusterings2Clusterings.outputPrefixFile.value + ".test : " + testingClustering.getNumClusters() + " objects");
            if (Clusterings2Clusterings.outputPrefixFile.value != null) {
                try {
                    ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(Clusterings2Clusterings.outputPrefixFile.value + ".train")));
                    oos.writeObject(new Clusterings(new Clustering[]{trainingClustering}));
                    oos.close();
                    oos = new ObjectOutputStream(new FileOutputStream(new File(Clusterings2Clusterings.outputPrefixFile.value + ".test")));
                    oos.writeObject(new Clusterings(new Clustering[]{testingClustering}));
                    oos.close();
                }
                catch (Exception e) {
                    logger.warning("Exception writing clustering to file " + Clusterings2Clusterings.outputPrefixFile.value + " " + e);
                    e.printStackTrace();
                }
            }
        }
    }

    private static Clustering createSmallerClustering(InstanceList instances) {
        Clustering c = ClusterUtils.createSingletonClustering(instances);
        return ClusterUtils.mergeInstancesWithSameLabel(c);
    }
}

