Spaces:
Build error
Build error
"""A bottom-up tree matching algorithm implementation meant to speed | |
up 2to3's matching process. After the tree patterns are reduced to | |
their rarest linear path, a linear Aho-Corasick automaton is | |
created. The linear automaton traverses the linear paths from the | |
leaves to the root of the AST and returns a set of nodes for further | |
matching. This reduces significantly the number of candidate nodes.""" | |
__author__ = "George Boutsioukis <gboutsioukis@gmail.com>" | |
import logging | |
import itertools | |
from collections import defaultdict | |
from . import pytree | |
from .btm_utils import reduce_tree | |
class BMNode(object): | |
"""Class for a node of the Aho-Corasick automaton used in matching""" | |
count = itertools.count() | |
def __init__(self): | |
self.transition_table = {} | |
self.fixers = [] | |
self.id = next(BMNode.count) | |
self.content = '' | |
class BottomMatcher(object): | |
"""The main matcher class. After instantiating the patterns should | |
be added using the add_fixer method""" | |
def __init__(self): | |
self.match = set() | |
self.root = BMNode() | |
self.nodes = [self.root] | |
self.fixers = [] | |
self.logger = logging.getLogger("RefactoringTool") | |
def add_fixer(self, fixer): | |
"""Reduces a fixer's pattern tree to a linear path and adds it | |
to the matcher(a common Aho-Corasick automaton). The fixer is | |
appended on the matching states and called when they are | |
reached""" | |
self.fixers.append(fixer) | |
tree = reduce_tree(fixer.pattern_tree) | |
linear = tree.get_linear_subpattern() | |
match_nodes = self.add(linear, start=self.root) | |
for match_node in match_nodes: | |
match_node.fixers.append(fixer) | |
def add(self, pattern, start): | |
"Recursively adds a linear pattern to the AC automaton" | |
#print("adding pattern", pattern, "to", start) | |
if not pattern: | |
#print("empty pattern") | |
return [start] | |
if isinstance(pattern[0], tuple): | |
#alternatives | |
#print("alternatives") | |
match_nodes = [] | |
for alternative in pattern[0]: | |
#add all alternatives, and add the rest of the pattern | |
#to each end node | |
end_nodes = self.add(alternative, start=start) | |
for end in end_nodes: | |
match_nodes.extend(self.add(pattern[1:], end)) | |
return match_nodes | |
else: | |
#single token | |
#not last | |
if pattern[0] not in start.transition_table: | |
#transition did not exist, create new | |
next_node = BMNode() | |
start.transition_table[pattern[0]] = next_node | |
else: | |
#transition exists already, follow | |
next_node = start.transition_table[pattern[0]] | |
if pattern[1:]: | |
end_nodes = self.add(pattern[1:], start=next_node) | |
else: | |
end_nodes = [next_node] | |
return end_nodes | |
def run(self, leaves): | |
"""The main interface with the bottom matcher. The tree is | |
traversed from the bottom using the constructed | |
automaton. Nodes are only checked once as the tree is | |
retraversed. When the automaton fails, we give it one more | |
shot(in case the above tree matches as a whole with the | |
rejected leaf), then we break for the next leaf. There is the | |
special case of multiple arguments(see code comments) where we | |
recheck the nodes | |
Args: | |
The leaves of the AST tree to be matched | |
Returns: | |
A dictionary of node matches with fixers as the keys | |
""" | |
current_ac_node = self.root | |
results = defaultdict(list) | |
for leaf in leaves: | |
current_ast_node = leaf | |
while current_ast_node: | |
current_ast_node.was_checked = True | |
for child in current_ast_node.children: | |
# multiple statements, recheck | |
if isinstance(child, pytree.Leaf) and child.value == ";": | |
current_ast_node.was_checked = False | |
break | |
if current_ast_node.type == 1: | |
#name | |
node_token = current_ast_node.value | |
else: | |
node_token = current_ast_node.type | |
if node_token in current_ac_node.transition_table: | |
#token matches | |
current_ac_node = current_ac_node.transition_table[node_token] | |
for fixer in current_ac_node.fixers: | |
results[fixer].append(current_ast_node) | |
else: | |
#matching failed, reset automaton | |
current_ac_node = self.root | |
if (current_ast_node.parent is not None | |
and current_ast_node.parent.was_checked): | |
#the rest of the tree upwards has been checked, next leaf | |
break | |
#recheck the rejected node once from the root | |
if node_token in current_ac_node.transition_table: | |
#token matches | |
current_ac_node = current_ac_node.transition_table[node_token] | |
for fixer in current_ac_node.fixers: | |
results[fixer].append(current_ast_node) | |
current_ast_node = current_ast_node.parent | |
return results | |
def print_ac(self): | |
"Prints a graphviz diagram of the BM automaton(for debugging)" | |
print("digraph g{") | |
def print_node(node): | |
for subnode_key in node.transition_table.keys(): | |
subnode = node.transition_table[subnode_key] | |
print("%d -> %d [label=%s] //%s" % | |
(node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers))) | |
if subnode_key == 1: | |
print(subnode.content) | |
print_node(subnode) | |
print_node(self.root) | |
print("}") | |
# taken from pytree.py for debugging; only used by print_ac | |
_type_reprs = {} | |
def type_repr(type_num): | |
global _type_reprs | |
if not _type_reprs: | |
from .pygram import python_symbols | |
# printing tokens is possible but not as useful | |
# from .pgen2 import token // token.__dict__.items(): | |
for name, val in python_symbols.__dict__.items(): | |
if type(val) == int: _type_reprs[val] = name | |
return _type_reprs.setdefault(type_num, type_num) | |