Spaces:
Running
Running
File size: 6,552 Bytes
658460c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
"""Token-related utilities"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from __future__ import annotations
import itertools
import tokenize
from io import StringIO
from keyword import iskeyword
from tokenize import TokenInfo
from typing import Generator, NamedTuple
class Token(NamedTuple):
token: int
text: str
start: int
end: int
line: str
def generate_tokens(readline) -> Generator[TokenInfo, None, None]:
"""wrap generate_tkens to catch EOF errors"""
try:
yield from tokenize.generate_tokens(readline)
except tokenize.TokenError:
# catch EOF error
return
def generate_tokens_catch_errors(
readline, extra_errors_to_catch: list[str] | None = None
):
default_errors_to_catch = [
"unterminated string literal",
"invalid non-printable character",
"after line continuation character",
]
assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
tokens: list[TokenInfo] = []
try:
for token in tokenize.generate_tokens(readline):
tokens.append(token)
yield token
except tokenize.TokenError as exc:
if any(error in exc.args[0] for error in errors_to_catch):
if tokens:
start = tokens[-1].start[0], tokens[-1].end[0]
end = start
line = tokens[-1].line
else:
start = end = (1, 0)
line = ""
yield TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
else:
# Catch EOF
raise
def line_at_cursor(cell: str, cursor_pos: int = 0) -> tuple[str, int]:
"""Return the line in a cell at a given cursor position
Used for calling line-based APIs that don't support multi-line input, yet.
Parameters
----------
cell : str
multiline block of text
cursor_pos : integer
the cursor position
Returns
-------
(line, offset): (string, integer)
The line with the current cursor, and the character offset of the start of the line.
"""
offset = 0
lines = cell.splitlines(True)
for line in lines:
next_offset = offset + len(line)
if not line.endswith("\n"):
# If the last line doesn't have a trailing newline, treat it as if
# it does so that the cursor at the end of the line still counts
# as being on that line.
next_offset += 1
if next_offset > cursor_pos:
break
offset = next_offset
else:
line = ""
return line, offset
def token_at_cursor(cell: str, cursor_pos: int = 0) -> str:
"""Get the token at a given cursor
Used for introspection.
Function calls are prioritized, so the token for the callable will be returned
if the cursor is anywhere inside the call.
Parameters
----------
cell : str
A block of Python code
cursor_pos : int
The location of the cursor in the block where the token should be found
"""
names: list[str] = []
call_names: list[str] = []
closing_call_name: str | None = None
most_recent_outer_name: str | None = None
offsets = {1: 0} # lines start at 1
intersects_with_cursor = False
cur_token_is_name = False
tokens: list[Token | None] = [
Token(*tup) for tup in generate_tokens(StringIO(cell).readline)
]
if not tokens:
return ""
for prev_tok, (tok, next_tok) in zip(
[None] + tokens, itertools.pairwise(tokens + [None])
):
# token, text, start, end, line = tup
start_line, start_col = tok.start
end_line, end_col = tok.end
if end_line + 1 not in offsets:
# keep track of offsets for each line
lines = tok.line.splitlines(True)
for lineno, line in enumerate(lines, start_line + 1):
if lineno not in offsets:
offsets[lineno] = offsets[lineno - 1] + len(line)
closing_call_name = None
offset = offsets[start_line]
if offset + start_col > cursor_pos:
# current token starts after the cursor,
# don't consume it
break
if cur_token_is_name := tok.token == tokenize.NAME and not iskeyword(tok.text):
if (
names
and prev_tok
and prev_tok.token == tokenize.OP
and prev_tok.text == "."
):
names[-1] = "%s.%s" % (names[-1], tok.text)
else:
names.append(tok.text)
if (
next_tok is not None
and next_tok.token == tokenize.OP
and next_tok.text == "="
):
# don't inspect the lhs of an assignment
names.pop(-1)
cur_token_is_name = False
if not call_names:
most_recent_outer_name = names[-1] if names else None
elif tok.token == tokenize.OP:
if tok.text == "(" and names:
# if we are inside a function call, inspect the function
call_names.append(names[-1])
elif tok.text == ")" and call_names:
# keep track of the most recently popped call_name from the stack
closing_call_name = call_names.pop(-1)
if offsets[end_line] + end_col > cursor_pos:
# we found the cursor, stop reading
# if the current token intersects directly, use it instead of the call token
intersects_with_cursor = offsets[start_line] + start_col <= cursor_pos
break
if cur_token_is_name and intersects_with_cursor:
return names[-1]
# if the cursor isn't directly over a name token, use the most recent
# call name if we can find one
elif closing_call_name:
# if we're on a ")", use the most recently popped call name
return closing_call_name
elif call_names:
# otherwise, look for the most recent call name in the stack
return call_names[-1]
elif most_recent_outer_name:
# if we've popped all the call names, use the most recently-seen
# outer name
return most_recent_outer_name
elif names:
# failing that, use the most recently seen name
return names[-1]
else:
# give up
return ""
|