package org.broadinstitute.gatk.tools.walkers.validation.validationsiteselector;

import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.GATKVCFUtils;
import org.broadinstitute.gatk.engine.SampleUtils;
import org.broadinstitute.gatk.engine.walkers.RodWalker;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.commandline.Hidden;
import org.broadinstitute.gatk.utils.commandline.Input;
import org.broadinstitute.gatk.utils.commandline.Output;
import org.broadinstitute.gatk.utils.commandline.RodBinding;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.utils.help.HelpConstants;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;

@DocumentedGATKFeature(groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class})
/* loaded from: input_file:org/broadinstitute/gatk/tools/walkers/validation/validationsiteselector/ValidationSiteSelector.class */
public class ValidationSiteSelector extends RodWalker<Integer, Integer> {

    @Input(fullName = "variant", shortName = "V", doc = "Input VCF file, can be specified multiple times", required = true)
    public List<RodBinding<VariantContext>> variants;

    @Argument(fullName = "sample_expressions", shortName = "se", doc = "Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required = false)
    public Set<String> sampleExpressions;

    @Input(fullName = "sample_file", shortName = "sf", doc = "File containing a list of samples (one per line) to include. Can be specified multiple times", required = false)
    public Set<File> sampleFiles;

    @Argument(fullName = "numValidationSites", shortName = "numSites", doc = "Number of output validation sites", required = true)
    private int numValidationSites;

    @Output(doc = "File to which variants should be written")
    protected VariantContextWriter vcfWriter = null;

    @Argument(fullName = "sample_name", shortName = "sn", doc = "Include genotypes from this sample. Can be specified multiple times", required = false)
    public Set<String> sampleNames = new HashSet(0);

    @Argument(fullName = "sampleMode", shortName = "sampleMode", doc = "Sample selection mode", required = false)
    private SAMPLE_SELECTION_MODE sampleMode = SAMPLE_SELECTION_MODE.NONE;

    @Argument(shortName = "samplePNonref", fullName = "samplePNonref", doc = "GL-based selection mode only: the probability that a site is non-reference in the samples for which to include the site", required = false)
    private double samplePNonref = 0.99d;

    @Argument(fullName = "includeFilteredSites", shortName = "ifs", doc = "If true, will include filtered sites in set to choose variants from", required = false)
    private boolean INCLUDE_FILTERED_SITES = false;

    @Argument(fullName = "ignoreGenotypes", shortName = "ignoreGenotypes", doc = "If true, will ignore genotypes in VCF, will take AC,AF from annotations and will make no sample selection", required = false)
    private boolean IGNORE_GENOTYPES = false;

    @Argument(fullName = "ignorePolymorphicStatus", shortName = "ignorePolymorphicStatus", doc = "If true, will ignore polymorphic status in VCF, and will take VCF record directly without pre-selection", required = false)
    private boolean IGNORE_POLYMORPHIC = false;

    @Hidden
    @Argument(fullName = "numFrequencyBins", shortName = "numBins", doc = "Number of frequency bins if we're to match AF distribution", required = false)
    private int numFrequencyBins = 20;

    @Argument(fullName = "frequencySelectionMode", shortName = "freqMode", doc = "Allele Frequency selection mode", required = false)
    private AF_COMPUTATION_MODE freqMode = AF_COMPUTATION_MODE.KEEP_AF_SPECTRUM;

    @Argument(fullName = "selectTypeToInclude", shortName = "selectType", doc = "Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required = false)
    private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList();
    private TreeSet<String> samples = new TreeSet<>();
    SampleSelector sampleSelector = null;
    FrequencyModeSelector frequencyModeSelector = null;
    private ArrayList<VariantContext.Type> selectedTypes = new ArrayList<>();

    /* loaded from: input_file:org/broadinstitute/gatk/tools/walkers/validation/validationsiteselector/ValidationSiteSelector$AF_COMPUTATION_MODE.class */
    public enum AF_COMPUTATION_MODE {
        KEEP_AF_SPECTRUM,
        UNIFORM
    }

    /* loaded from: input_file:org/broadinstitute/gatk/tools/walkers/validation/validationsiteselector/ValidationSiteSelector$SAMPLE_SELECTION_MODE.class */
    public enum SAMPLE_SELECTION_MODE {
        NONE,
        POLY_BASED_ON_GT,
        POLY_BASED_ON_GL
    }

    @Override // org.broadinstitute.gatk.engine.walkers.Walker
    public void initialize() {
        TreeSet treeSet = new TreeSet(SampleUtils.getSampleList(GATKVCFUtils.getVCFHeadersFromRods(getToolkit()), GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
        Collection<String> samplesFromFiles = SampleUtils.getSamplesFromFiles(this.sampleFiles);
        Collection<String> matchSamplesExpressions = SampleUtils.matchSamplesExpressions(treeSet, this.sampleExpressions);
        this.samples.addAll(samplesFromFiles);
        this.samples.addAll(matchSamplesExpressions);
        this.samples.addAll(this.sampleNames);
        if (this.samples.isEmpty()) {
            this.samples.addAll(treeSet);
        }
        this.sampleSelector = getSampleSelectorObject(this.sampleMode, this.samples);
        this.frequencyModeSelector = getFrequencyModeSelectorObject(this.freqMode, getToolkit().getGenomeLocParser());
        if (this.TYPES_TO_INCLUDE.isEmpty()) {
            for (VariantContext.Type type : VariantContext.Type.values()) {
                this.selectedTypes.add(type);
            }
        } else {
            Iterator<VariantContext.Type> it2 = this.TYPES_TO_INCLUDE.iterator();
            while (it2.hasNext()) {
                this.selectedTypes.add(it2.next());
            }
        }
        HashSet hashSet = new HashSet();
        hashSet.add(new VCFHeaderLine(VCFHeader.SOURCE_KEY, "ValidationSiteSelector"));
        this.vcfWriter.writeHeader(new VCFHeader(hashSet));
    }

    @Override // org.broadinstitute.gatk.engine.walkers.LocusWalker
    public Integer map(RefMetaDataTracker refMetaDataTracker, ReferenceContext referenceContext, AlignmentContext alignmentContext) {
        if (refMetaDataTracker == null) {
            return 0;
        }
        List<VariantContext> values = refMetaDataTracker.getValues(this.variants, alignmentContext.getLocation());
        if (values == null || values.size() == 0) {
            return 0;
        }
        for (VariantContext variantContext : values) {
            if (this.selectedTypes.contains(variantContext.getType()) && (variantContext.isPolymorphicInSamples() || this.IGNORE_POLYMORPHIC)) {
                if (this.INCLUDE_FILTERED_SITES || !variantContext.filtersWereApplied() || !variantContext.isFiltered()) {
                    this.frequencyModeSelector.logCurrentSiteData(variantContext, this.samples.isEmpty() ? true : this.sampleSelector.selectSiteInSamples(variantContext), this.IGNORE_GENOTYPES, this.IGNORE_POLYMORPHIC);
                }
            }
        }
        return 1;
    }

    @Override // org.broadinstitute.gatk.engine.walkers.Walker
    public Integer reduceInit() {
        return 0;
    }

    @Override // org.broadinstitute.gatk.engine.walkers.Walker
    public Integer reduce(Integer num, Integer num2) {
        return Integer.valueOf(num.intValue() + num2.intValue());
    }

    @Override // org.broadinstitute.gatk.engine.walkers.Walker
    public void onTraversalDone(Integer num) {
        logger.info("Outputting validation sites...");
        Iterator<VariantContext> it2 = this.frequencyModeSelector.selectValidationSites(this.numValidationSites).iterator();
        while (it2.hasNext()) {
            this.vcfWriter.add(it2.next());
        }
        logger.info(num + " records processed.");
    }

    private SampleSelector getSampleSelectorObject(SAMPLE_SELECTION_MODE sample_selection_mode, TreeSet<String> treeSet) {
        SampleSelector nullSampleSelector;
        switch (sample_selection_mode) {
            case POLY_BASED_ON_GL:
                nullSampleSelector = new GLBasedSampleSelector(treeSet, Math.log10(1.0d - this.samplePNonref));
                break;
            case POLY_BASED_ON_GT:
                nullSampleSelector = new GTBasedSampleSelector(treeSet);
                break;
            case NONE:
                nullSampleSelector = new NullSampleSelector(treeSet);
                break;
            default:
                throw new IllegalArgumentException("Unsupported Sample Selection Mode: " + sample_selection_mode);
        }
        return nullSampleSelector;
    }

    private FrequencyModeSelector getFrequencyModeSelectorObject(AF_COMPUTATION_MODE af_computation_mode, GenomeLocParser genomeLocParser) {
        FrequencyModeSelector uniformSamplingFrequencySelector;
        switch (af_computation_mode) {
            case KEEP_AF_SPECTRUM:
                uniformSamplingFrequencySelector = new KeepAFSpectrumFrequencySelector(this.numFrequencyBins, genomeLocParser);
                break;
            case UNIFORM:
                uniformSamplingFrequencySelector = new UniformSamplingFrequencySelector(genomeLocParser);
                break;
            default:
                throw new IllegalArgumentException("Unexpected Frequency Selection Mode: " + af_computation_mode);
        }
        return uniformSamplingFrequencySelector;
    }
}
