import spacy
from typing import List
import re


class Parser(object):
    def parse(self, filepath: str) -> List[List[List[spacy.tokens.Token]]]:
        re_pattern_strings = ["【住所又は居所】", "【非特許文献.+】", "【特許文献.+】"]
        re_patterns = []
        for patten_string in re_pattern_strings:
            re_pattern = re.compile(patten_string)
            re_patterns.append(re_pattern)

        nlp = spacy.load("ja_ginza")
        docs = []
        with open(filepath, "r", encoding="utf-8") as fin:
            for line in fin:
                try:
                    line_temp = line.rstrip()
                    for re_pattern in re_patterns:
                        line_temp = "" if re_pattern.search(line_temp) else line_temp
                    if line_temp == "":
                        line_temp = " "
                    doc = nlp(line_temp)
                    docs.append(doc)
                except:
                    import traceback

                    traceback.print_exc()
                    continue

        astree = []
        for doc in docs:
            if len(list(doc.sents)) > 1:
                sentences = []
                for sent in doc.sents:
                    sentences.append(str(sent))
                line_tokens = list(nlp.pipe(sentences))
            else:
                sentences = []
                line_tokens = []
                for sent in doc.sents:
                    for token in sent:
                        sentences.append(token)
                line_tokens.append(sentences)
            astree.append(line_tokens)

        return astree
