Je voulais aligner le processus de normalisation avec SudachiPy, donc j'ai extrait uniquement le processus pertinent.
from sudachipy import dictionary
from sudachipy.utf8inputtextbuilder import UTF8InputTextBuilder
dictionary = dictionary.Dictionary()
input_text_plugins = dictionary.input_text_plugins
grammar = dictionary.grammar
def normalize(text):
    builder = UTF8InputTextBuilder(text, grammar)
    for plugin in input_text_plugins:
        plugin.rewrite(builder)
    return builder.get_text()
if __name__ == '__main__':
    print(normalize('ABC123')) # -> abc123
        Recommended Posts