"""Token-related utilities""" # Copyright (c) IPython Development Team. # Distributed under the terms of the Modified BSD License. from __future__ import annotations import itertools import tokenize from io import StringIO from keyword import iskeyword from tokenize import TokenInfo from typing import Generator, NamedTuple class Token(NamedTuple): token: int text: str start: int end: int line: str def generate_tokens(readline) -> Generator[TokenInfo, None, None]: """wrap generate_tkens to catch EOF errors""" try: yield from tokenize.generate_tokens(readline) except tokenize.TokenError: # catch EOF error return def generate_tokens_catch_errors( readline, extra_errors_to_catch: list[str] | None = None ): default_errors_to_catch = [ "unterminated string literal", "invalid non-printable character", "after line continuation character", ] assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list) errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or []) tokens: list[TokenInfo] = [] try: for token in tokenize.generate_tokens(readline): tokens.append(token) yield token except tokenize.TokenError as exc: if any(error in exc.args[0] for error in errors_to_catch): if tokens: start = tokens[-1].start[0], tokens[-1].end[0] end = start line = tokens[-1].line else: start = end = (1, 0) line = "" yield TokenInfo(tokenize.ERRORTOKEN, "", start, end, line) else: # Catch EOF raise def line_at_cursor(cell: str, cursor_pos: int = 0) -> tuple[str, int]: """Return the line in a cell at a given cursor position Used for calling line-based APIs that don't support multi-line input, yet. Parameters ---------- cell : str multiline block of text cursor_pos : integer the cursor position Returns ------- (line, offset): (string, integer) The line with the current cursor, and the character offset of the start of the line. """ offset = 0 lines = cell.splitlines(True) for line in lines: next_offset = offset + len(line) if not line.endswith("\n"): # If the last line doesn't have a trailing newline, treat it as if # it does so that the cursor at the end of the line still counts # as being on that line. next_offset += 1 if next_offset > cursor_pos: break offset = next_offset else: line = "" return line, offset def token_at_cursor(cell: str, cursor_pos: int = 0) -> str: """Get the token at a given cursor Used for introspection. Function calls are prioritized, so the token for the callable will be returned if the cursor is anywhere inside the call. Parameters ---------- cell : str A block of Python code cursor_pos : int The location of the cursor in the block where the token should be found """ names: list[str] = [] call_names: list[str] = [] closing_call_name: str | None = None most_recent_outer_name: str | None = None offsets = {1: 0} # lines start at 1 intersects_with_cursor = False cur_token_is_name = False tokens: list[Token | None] = [ Token(*tup) for tup in generate_tokens(StringIO(cell).readline) ] if not tokens: return "" for prev_tok, (tok, next_tok) in zip( [None] + tokens, itertools.pairwise(tokens + [None]) ): # token, text, start, end, line = tup start_line, start_col = tok.start end_line, end_col = tok.end if end_line + 1 not in offsets: # keep track of offsets for each line lines = tok.line.splitlines(True) for lineno, line in enumerate(lines, start_line + 1): if lineno not in offsets: offsets[lineno] = offsets[lineno - 1] + len(line) closing_call_name = None offset = offsets[start_line] if offset + start_col > cursor_pos: # current token starts after the cursor, # don't consume it break if cur_token_is_name := tok.token == tokenize.NAME and not iskeyword(tok.text): if ( names and prev_tok and prev_tok.token == tokenize.OP and prev_tok.text == "." ): names[-1] = "%s.%s" % (names[-1], tok.text) else: names.append(tok.text) if ( next_tok is not None and next_tok.token == tokenize.OP and next_tok.text == "=" ): # don't inspect the lhs of an assignment names.pop(-1) cur_token_is_name = False if not call_names: most_recent_outer_name = names[-1] if names else None elif tok.token == tokenize.OP: if tok.text == "(" and names: # if we are inside a function call, inspect the function call_names.append(names[-1]) elif tok.text == ")" and call_names: # keep track of the most recently popped call_name from the stack closing_call_name = call_names.pop(-1) if offsets[end_line] + end_col > cursor_pos: # we found the cursor, stop reading # if the current token intersects directly, use it instead of the call token intersects_with_cursor = offsets[start_line] + start_col <= cursor_pos break if cur_token_is_name and intersects_with_cursor: return names[-1] # if the cursor isn't directly over a name token, use the most recent # call name if we can find one elif closing_call_name: # if we're on a ")", use the most recently popped call name return closing_call_name elif call_names: # otherwise, look for the most recent call name in the stack return call_names[-1] elif most_recent_outer_name: # if we've popped all the call names, use the most recently-seen # outer name return most_recent_outer_name elif names: # failing that, use the most recently seen name return names[-1] else: # give up return ""