#!/usr/bin/env python
import argparse
import sys

parser = argparse.ArgumentParser(description="")
parser.add_argument("--fasta", nargs="?", help="input fasta file")
parser.add_argument("--kmers", nargs="?", help="input kmers file" )
parser.add_argument("--out",nargs="?", help="output tab file", type=argparse.FileType('w'), default=sys.stdout)
parser.add_argument('-d', action="store_true", default=False)
args = parser.parse_args()

import glob
import os
import sys
import re
import matplotlib.pyplot as plt
from Bio import SeqIO

def is_fuzzy_match(query:str, candidate:str):
	if query == candidate:
		return True
	for idx, base in enumerate(query):
		if (query[idx]!='N') and (query[idx]!=candidate[idx]):
			return False
	return True

cmap={"A":"T","T":"A","C":"G","G":"C","N":"N"}

to_find = {line.strip() for line in open(args.kmers) }
rcs=set()
ksize = 0
for kmer in to_find:
	if(ksize == 0):
		ksize = len(kmer)
	assert len(kmer) == ksize
	rc = ""
	for char in reversed(kmer):
		rc += cmap[char]
	rcs.add(rc)

to_find.update(rcs)
#print(to_find)

recs = list(SeqIO.parse(args.fasta, "fasta"))
#print(recs)

for rec in recs:
	seq = str(rec.seq)
	for i in range(len(seq) - ksize):
		kmer = seq[i:i+ksize]
		#print(kmer)
		for query in to_find:
			if is_fuzzy_match(query, kmer):
				args.out.write("{}\t{}\t{}\t{}\n".format(rec.id, i, i+ksize, "100"))
				print("{}\t{}\t{}\t{}".format(rec.id, i, i+ksize, "100"))
