package org.broadinstitute.gatk.tools.walkers.variantrecalibration;

import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFFilterHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFStandardHeaderLines;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.GATKVCFUtils;
import org.broadinstitute.gatk.engine.SampleUtils;
import org.broadinstitute.gatk.engine.walkers.PartitionBy;
import org.broadinstitute.gatk.engine.walkers.PartitionType;
import org.broadinstitute.gatk.engine.walkers.RodWalker;
import org.broadinstitute.gatk.engine.walkers.TreeReducible;
import org.broadinstitute.gatk.tools.walkers.annotator.AnnotationUtils;
import org.broadinstitute.gatk.tools.walkers.genotyper.StandardCallerArgumentCollection;
import org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.Advanced;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.commandline.Input;
import org.broadinstitute.gatk.utils.commandline.Output;
import org.broadinstitute.gatk.utils.commandline.RodBinding;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.utils.help.HelpConstants;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;

@PartitionBy(PartitionType.LOCUS)
@DocumentedGATKFeature(groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class})
/* loaded from: input_file:org/broadinstitute/gatk/tools/walkers/variantrecalibration/ApplyRecalibration.class */
public class ApplyRecalibration extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
    public static final String LOW_VQSLOD_FILTER_NAME = "LOW_VQSLOD";

    @Input(fullName = "input", shortName = "input", doc = "The raw input variants to be recalibrated", required = true)
    public List<RodBinding<VariantContext>> input;

    @Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal file used by ApplyRecalibration", required = true)
    protected RodBinding<VariantContext> recal;

    @Input(fullName = "tranches_file", shortName = "tranchesFile", doc = "The input tranches file describing where to cut the data", required = false)
    protected File TRANCHES_FILE;
    private static final String listPrintSeparator = ",";
    private static final String trancheFilterString = "VQSRTranche";
    private static final String arrayParseRegex = "[\\[\\]\\s]";
    private static final String emptyStringValue = "NA";
    private static final String emptyFloatValue = "NaN";
    private final double DEFAULT_VQSLOD_CUTOFF = StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION;
    boolean foundSNPTranches = false;
    boolean foundINDELTranches = false;

    @Output(doc = "The output filtered and recalibrated VCF file in which each variant is annotated with its VQSLOD value")
    private VariantContextWriter vcfWriter = null;

    @Argument(fullName = "ts_filter_level", shortName = "ts_filter_level", doc = "The truth sensitivity level at which to start filtering", required = false)
    protected Double TS_FILTER_LEVEL = null;

    @Argument(fullName = "useAlleleSpecificAnnotations", shortName = SAMSequenceRecord.ASSEMBLY_TAG, doc = "If specified, the tool will attempt to apply a filter to each allele based on the input tranches and allele-specific .recal file.", required = false)
    public boolean useASannotations = false;

    @Advanced
    @Argument(fullName = "lodCutoff", shortName = "lodCutoff", doc = "The VQSLOD score below which to start filtering", required = false)
    protected Double VQSLOD_CUTOFF = null;

    @Argument(fullName = "ignore_filter", shortName = "ignoreFilter", doc = "If specified, the recalibration will be applied to variants marked as filtered by the specified filter name in the input VCF file", required = false)
    private String[] IGNORE_INPUT_FILTERS = null;

    @Argument(fullName = "ignore_all_filters", shortName = "ignoreAllFilters", doc = "If specified, the variant recalibrator will ignore all input filters. Useful to rerun the VQSR from a filtered output file.", required = false)
    private boolean IGNORE_ALL_FILTERS = false;

    @Argument(fullName = "excludeFiltered", shortName = "ef", doc = "Don't output filtered loci after applying the recalibration", required = false)
    protected boolean EXCLUDE_FILTERED = false;

    @Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously.", required = false)
    public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP;
    private final List<Tranche> tranches = new ArrayList();
    private final Set<String> inputNames = new HashSet();
    private final Set<String> ignoreInputFilterSet = new TreeSet();

    @Override // org.broadinstitute.gatk.engine.walkers.Walker
    public void initialize() {
        if (this.TS_FILTER_LEVEL != null) {
            for (Tranche tranche : Tranche.readTranches(this.TRANCHES_FILE)) {
                if (tranche.ts >= this.TS_FILTER_LEVEL.doubleValue()) {
                    this.tranches.add(tranche);
                }
                logger.info(String.format("Read tranche " + tranche, new Object[0]));
            }
            Collections.reverse(this.tranches);
        }
        Iterator<RodBinding<VariantContext>> it2 = this.input.iterator();
        while (it2.hasNext()) {
            this.inputNames.add(it2.next().getName());
        }
        if (this.IGNORE_INPUT_FILTERS != null) {
            this.ignoreInputFilterSet.addAll(Arrays.asList(this.IGNORE_INPUT_FILTERS));
        }
        HashSet hashSet = new HashSet();
        Set<VCFHeaderLine> headerFields = GATKVCFUtils.getHeaderFields(getToolkit(), this.inputNames);
        hashSet.addAll(headerFields);
        addVQSRStandardHeaderLines(hashSet);
        if (this.useASannotations) {
            addAlleleSpecificVQSRHeaderLines(hashSet);
        }
        checkForPreviousApplyRecalRun(Collections.unmodifiableSet(headerFields));
        TreeSet treeSet = new TreeSet();
        treeSet.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), this.inputNames));
        if (this.TS_FILTER_LEVEL == null) {
            if (this.VQSLOD_CUTOFF == null) {
                this.VQSLOD_CUTOFF = Double.valueOf(StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION);
            }
            hashSet.add(new VCFFilterHeaderLine(LOW_VQSLOD_FILTER_NAME, "VQSLOD < " + this.VQSLOD_CUTOFF));
            logger.info("Keeping all variants with VQSLOD >= " + this.VQSLOD_CUTOFF);
        } else {
            if (this.VQSLOD_CUTOFF != null) {
                throw new UserException("Arguments --ts_filter_level and --lodCutoff are mutually exclusive. Please only specify one option.");
            }
            if (this.tranches.size() >= 2) {
                for (int i = 0; i < this.tranches.size() - 1; i++) {
                    Tranche tranche2 = this.tranches.get(i);
                    hashSet.add(new VCFFilterHeaderLine(tranche2.name, String.format("Truth sensitivity tranche level for " + tranche2.model.toString() + " model at VQS Lod: " + tranche2.minVQSLod + " <= x < " + this.tranches.get(i + 1).minVQSLod, new Object[0])));
                }
            }
            if (this.tranches.size() < 1) {
                throw new UserException("No tranches were found in the file or were above the truth sensitivity filter level " + this.TS_FILTER_LEVEL);
            }
            hashSet.add(new VCFFilterHeaderLine(this.tranches.get(0).name + "+", String.format("Truth sensitivity tranche level for " + this.tranches.get(0).model.toString() + " model at VQS Lod < " + this.tranches.get(0).minVQSLod, new Object[0])));
            logger.info("Keeping all variants in tranche " + this.tranches.get(this.tranches.size() - 1));
        }
        this.vcfWriter.writeHeader(new VCFHeader(hashSet, treeSet));
    }

    private boolean trancheIntervalIsValid(String str) {
        String[] split = str.split("to");
        if (split.length != 2) {
            return false;
        }
        try {
            Double.parseDouble(split[0]);
            Double.parseDouble(split[1].replace("+", ""));
            return true;
        } catch (NumberFormatException e) {
            throw new UserException("Poorly formatted tranche filter name does not contain two sensitivity interval end points.");
        }
    }

    public static void addVQSRStandardHeaderLines(Set<VCFHeaderLine> set) {
        set.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
        set.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.VQS_LOD_KEY));
        set.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.CULPRIT_KEY));
        set.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.POSITIVE_LABEL_KEY));
        set.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.NEGATIVE_LABEL_KEY));
    }

    public static void addAlleleSpecificVQSRHeaderLines(Set<VCFHeaderLine> set) {
        set.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_FILTER_STATUS_KEY));
        set.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_CULPRIT_KEY));
        set.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_VQS_LOD_KEY));
    }

    private void checkForPreviousApplyRecalRun(Set<VCFHeaderLine> set) {
        for (VCFHeaderLine vCFHeaderLine : set) {
            if (vCFHeaderLine instanceof VCFFilterHeaderLine) {
                String id = ((VCFFilterHeaderLine) vCFHeaderLine).getID();
                if (id.length() >= 12 && id.substring(0, 11).equalsIgnoreCase(trancheFilterString)) {
                    if (id.charAt(11) == 'S') {
                        if (trancheIntervalIsValid(id.substring(14))) {
                            this.foundSNPTranches = true;
                        }
                    } else if (id.charAt(11) == 'I' && trancheIntervalIsValid(id.substring(16))) {
                        this.foundINDELTranches = true;
                    }
                }
            }
        }
    }

    @Override // org.broadinstitute.gatk.engine.walkers.LocusWalker
    public Integer map(RefMetaDataTracker refMetaDataTracker, ReferenceContext referenceContext, AlignmentContext alignmentContext) {
        if (refMetaDataTracker == null) {
            return 1;
        }
        List<VariantContext> values = refMetaDataTracker.getValues(this.input, alignmentContext.getLocation());
        List<VariantContext> values2 = refMetaDataTracker.getValues(this.recal, alignmentContext.getLocation());
        for (VariantContext variantContext : values) {
            boolean z = this.useASannotations || VariantDataManager.checkVariationClass(variantContext, this.MODE);
            boolean z2 = this.IGNORE_ALL_FILTERS || variantContext.isNotFiltered() || (!this.ignoreInputFilterSet.isEmpty() && this.ignoreInputFilterSet.containsAll(variantContext.getFilters()));
            if (z && z2) {
                VariantContextBuilder variantContextBuilder = new VariantContextBuilder(variantContext);
                String doSiteSpecificFiltering = !this.useASannotations ? doSiteSpecificFiltering(variantContext, values2, variantContextBuilder) : doAlleleSpecificFiltering(variantContext, values2, variantContextBuilder);
                if (doSiteSpecificFiltering.equals(VCFConstants.PASSES_FILTERS_v4)) {
                    variantContextBuilder.passFilters();
                } else if (doSiteSpecificFiltering.equals(".")) {
                    variantContextBuilder.unfiltered();
                } else {
                    variantContextBuilder.filters(doSiteSpecificFiltering);
                }
                VariantContext make = variantContextBuilder.make();
                if (!this.EXCLUDE_FILTERED || make.isNotFiltered()) {
                    this.vcfWriter.add(make);
                }
            } else {
                this.vcfWriter.add(variantContext);
            }
        }
        return 1;
    }

    public double parseFilterLowerLimit(String str) {
        Matcher matcher = Pattern.compile("VQSRTranche\\S+(\\d+\\.\\d+)to(\\d+\\.\\d+)").matcher(str);
        if (matcher.find()) {
            return Double.parseDouble(matcher.group(1));
        }
        return -1.0d;
    }

    protected String generateFilterStringFromAlleles(VariantContext variantContext, double d) {
        String generateFilterString;
        boolean z = (this.MODE == VariantRecalibratorArgumentCollection.Mode.SNP && this.foundINDELTranches) || (this.MODE == VariantRecalibratorArgumentCollection.Mode.INDEL && this.foundSNPTranches);
        boolean z2 = !variantContext.isMixed() && VariantDataManager.checkVariationClass(variantContext, this.MODE);
        if (!z && !z2) {
            return ".";
        }
        String attributeAsString = variantContext.getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, null);
        if (attributeAsString == null || attributeAsString.equals(".")) {
            generateFilterString = generateFilterString(d);
        } else {
            String[] split = variantContext.getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, null).split(",");
            String generateFilterString2 = generateFilterString(d);
            if (generateFilterString2.equals(VCFConstants.PASSES_FILTERS_v4)) {
                generateFilterString = generateFilterString2;
            } else {
                double parseFilterLowerLimit = parseFilterLowerLimit(generateFilterString2);
                int i = 0;
                while (true) {
                    if (i >= split.length) {
                        break;
                    }
                    String trim = split[i].replaceAll(arrayParseRegex, "").trim();
                    if (trim.equals(VCFConstants.PASSES_FILTERS_v4)) {
                        generateFilterString2 = trim;
                        break;
                    }
                    double parseFilterLowerLimit2 = parseFilterLowerLimit(trim);
                    if (parseFilterLowerLimit2 != -1.0d && parseFilterLowerLimit2 < parseFilterLowerLimit) {
                        parseFilterLowerLimit = parseFilterLowerLimit2;
                        generateFilterString2 = trim;
                    }
                    i++;
                }
                generateFilterString = generateFilterString2;
            }
        }
        return generateFilterString;
    }

    protected String generateFilterString(double d) {
        String str = null;
        if (this.TS_FILTER_LEVEL != null) {
            int size = this.tranches.size() - 1;
            while (true) {
                if (size < 0) {
                    break;
                }
                Tranche tranche = this.tranches.get(size);
                if (d >= tranche.minVQSLod) {
                    str = size == this.tranches.size() - 1 ? VCFConstants.PASSES_FILTERS_v4 : tranche.name;
                } else {
                    size--;
                }
            }
            if (str == null) {
                str = this.tranches.get(0).name + "+";
            }
        } else {
            str = d < this.VQSLOD_CUTOFF.doubleValue() ? LOW_VQSLOD_FILTER_NAME : VCFConstants.PASSES_FILTERS_v4;
        }
        return str;
    }

    private VariantContext getMatchingRecalVC(VariantContext variantContext, List<VariantContext> list, Allele allele) {
        Iterator<VariantContext> it2 = list.iterator();
        while (it2.hasNext()) {
            VariantContext next = it2.next();
            if (variantContext.getEnd() != next.getEnd() || (this.useASannotations && !allele.equals(next.getAlternateAllele(0)))) {
            }
            return next;
        }
        return null;
    }

    private void updateAnnotationsWithoutRecalibrating(int i, String[] strArr, String[] strArr2, String[] strArr3, List<String> list, List<String> list2, List<String> list3) {
        if (!this.foundINDELTranches && !this.foundSNPTranches) {
            list.add(emptyStringValue);
            list2.add(emptyFloatValue);
            list3.add(emptyStringValue);
        } else if (i < strArr.length) {
            list.add(strArr[i].replaceAll(arrayParseRegex, "").trim());
            list2.add(strArr2[i].replaceAll(arrayParseRegex, "").trim());
            list3.add(strArr3[i].replaceAll(arrayParseRegex, "").trim());
        }
    }

    private String doAlleleSpecificFiltering(VariantContext variantContext, List<VariantContext> list, VariantContextBuilder variantContextBuilder) {
        double d = -20000.0d;
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        String[] strArr = null;
        String[] strArr2 = null;
        String[] strArr3 = null;
        if (this.foundINDELTranches || this.foundSNPTranches) {
            String attributeAsString = variantContext.getAttributeAsString(GATKVCFConstants.AS_CULPRIT_KEY, "");
            strArr = attributeAsString.isEmpty() ? new String[0] : attributeAsString.split(",");
            String attributeAsString2 = variantContext.getAttributeAsString(GATKVCFConstants.AS_VQS_LOD_KEY, "");
            strArr2 = attributeAsString2.isEmpty() ? new String[0] : attributeAsString2.split(",");
            String attributeAsString3 = variantContext.getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, "");
            strArr3 = attributeAsString3.isEmpty() ? new String[0] : attributeAsString3.split(",");
        }
        for (int i = 0; i < variantContext.getNAlleles() - 1; i++) {
            Allele alternateAllele = variantContext.getAlternateAllele(i);
            if (VariantDataManager.checkVariationClass(variantContext, alternateAllele, this.MODE)) {
                String str = emptyFloatValue;
                String str2 = emptyStringValue;
                String str3 = emptyStringValue;
                if (!alternateAllele.equals(Allele.SPAN_DEL)) {
                    VariantContext matchingRecalVC = getMatchingRecalVC(variantContext, list, alternateAllele);
                    if (matchingRecalVC == null) {
                        throw new UserException("Encountered input allele which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants with flag -AS. First seen at: " + variantContext);
                    }
                    double attributeAsDouble = matchingRecalVC.getAttributeAsDouble(GATKVCFConstants.VQS_LOD_KEY, -20000.0d);
                    if (attributeAsDouble > d) {
                        d = attributeAsDouble;
                    }
                    str = String.format("%.4f", Double.valueOf(attributeAsDouble));
                    str2 = generateFilterString(attributeAsDouble);
                    str3 = matchingRecalVC.getAttributeAsString(GATKVCFConstants.CULPRIT_KEY, ".");
                    if (matchingRecalVC != null) {
                        if (matchingRecalVC.hasAttribute(GATKVCFConstants.POSITIVE_LABEL_KEY)) {
                            variantContextBuilder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
                        }
                        if (matchingRecalVC.hasAttribute(GATKVCFConstants.NEGATIVE_LABEL_KEY)) {
                            variantContextBuilder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);
                        }
                    }
                }
                arrayList2.add(str);
                arrayList3.add(str2);
                arrayList.add(str3);
            } else {
                updateAnnotationsWithoutRecalibrating(i, strArr, strArr2, strArr3, arrayList, arrayList2, arrayList3);
            }
        }
        if (!arrayList3.isEmpty()) {
            variantContextBuilder.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AnnotationUtils.encodeStringList(arrayList3));
        }
        if (!arrayList2.isEmpty()) {
            variantContextBuilder.attribute(GATKVCFConstants.AS_VQS_LOD_KEY, AnnotationUtils.encodeStringList(arrayList2));
        }
        if (!arrayList.isEmpty()) {
            variantContextBuilder.attribute(GATKVCFConstants.AS_CULPRIT_KEY, AnnotationUtils.encodeStringList(arrayList));
        }
        return generateFilterStringFromAlleles(variantContext, d);
    }

    private String doSiteSpecificFiltering(VariantContext variantContext, List<VariantContext> list, VariantContextBuilder variantContextBuilder) {
        VariantContext matchingRecalVC = getMatchingRecalVC(variantContext, list, null);
        if (matchingRecalVC == null) {
            throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + variantContext);
        }
        String attributeAsString = matchingRecalVC.getAttributeAsString(GATKVCFConstants.VQS_LOD_KEY, null);
        if (attributeAsString == null) {
            throw new UserException("Encountered a malformed record in the input recal file. There is no lod for the record at: " + variantContext);
        }
        try {
            double doubleValue = Double.valueOf(attributeAsString).doubleValue();
            variantContextBuilder.attribute(GATKVCFConstants.VQS_LOD_KEY, Double.valueOf(doubleValue));
            variantContextBuilder.attribute(GATKVCFConstants.CULPRIT_KEY, matchingRecalVC.getAttribute(GATKVCFConstants.CULPRIT_KEY));
            if (matchingRecalVC != null) {
                if (matchingRecalVC.hasAttribute(GATKVCFConstants.POSITIVE_LABEL_KEY)) {
                    variantContextBuilder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
                }
                if (matchingRecalVC.hasAttribute(GATKVCFConstants.NEGATIVE_LABEL_KEY)) {
                    variantContextBuilder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);
                }
            }
            return generateFilterString(doubleValue);
        } catch (NumberFormatException e) {
            throw new UserException("Encountered a malformed record in the input recal file. The lod is unreadable for the record at: " + variantContext);
        }
    }

    @Override // org.broadinstitute.gatk.engine.walkers.Walker
    public Integer reduceInit() {
        return 1;
    }

    @Override // org.broadinstitute.gatk.engine.walkers.Walker
    public Integer reduce(Integer num, Integer num2) {
        return 1;
    }

    @Override // org.broadinstitute.gatk.engine.walkers.TreeReducible
    public Integer treeReduce(Integer num, Integer num2) {
        return 1;
    }

    @Override // org.broadinstitute.gatk.engine.walkers.Walker
    public void onTraversalDone(Integer num) {
    }
}
