-
Notifications
You must be signed in to change notification settings - Fork 83
/
Copy pathCollectTokenFeatures.py
36 lines (33 loc) · 1.43 KB
/
CollectTokenFeatures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import groomlib
from antlr4 import ParseTreeListener
class CollectTokenFeatures(ParseTreeListener):
def __init__(self, stream):
self.stream = stream # track stream so we can examine previous tokens
self.inject_newlines = [] # prediction
self.indent = [] # prediction of indent/dedent == column delta
self.whitespace = [] # prediction of whitespace before token
self.features = [] # independent vars
self.first_token_on_line = None # track to compute indent
def visitTerminal(self, node):
i = node.symbol.tokenIndex
curToken = node.symbol
if curToken.type==-1:
return
precedingNL = False
column_delta = 0
ws = 0
if i>=1:
prevToken = self.stream.tokens[i-1]
precedingNL = curToken.line > prevToken.line
if precedingNL:
if self.first_token_on_line is not None:
column_delta = curToken.column - self.first_token_on_line.column
self.first_token_on_line = curToken
else:
ws = curToken.column - (prevToken.column+len(prevToken.text))
#print dir(node)
vars = groomlib.node_features(self.stream.tokens, node)
self.inject_newlines.append(1 if precedingNL else 0)
self.indent.append(column_delta)
self.whitespace.append(ws)
self.features.append(vars)