From 8b276ff1bc6d1e67c5756800bba02b1ecc6ea3ac Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Wed, 12 Apr 2023 19:59:41 -0700 Subject: [PATCH 1/7] Prototype of a new debugger based on pep 669 --- Lib/bdbx.py | 653 ++++++++++++++++++++++++++++++++++++++++++ Lib/test/test_pdbx.py | 106 +++++++ 2 files changed, 759 insertions(+) create mode 100644 Lib/bdbx.py create mode 100644 Lib/test/test_pdbx.py diff --git a/Lib/bdbx.py b/Lib/bdbx.py new file mode 100644 index 00000000000000..eda6fd3728a88e --- /dev/null +++ b/Lib/bdbx.py @@ -0,0 +1,653 @@ +import cmd +import dis +import linecache +import os +import re +import reprlib +import sys +import tokenize + +from dataclasses import dataclass +from types import FrameType, CodeType +from typing import Callable + + +MEVENT = sys.monitoring.events + + +class BdbxSetBreakpointException(Exception): + pass + + +class Breakpoint: + def __init__(self, file, line_number, code): + self.file = file + self.line_number = line_number + self.code = code + + def belong_to(self, code): + if self.file == code.co_filename and \ + self.line_number >= code.co_firstlineno: + for instr in dis.get_instructions(code): + if instr.positions.lineno == self.line_number: + return True + return False + + +class MonitorGenie: + """ + MonitorGenie is a layer to handle PEP-669 events aka sys.monitoring. + + It saves the trouble for the debugger to handle the monitoring events. + MonitorGenie takes file and function breakpoints, and an action to start + the monitoring. The accepted actions are: + "step" + "next" + "return" + "continue" + """ + def __init__( + self, + tool_id: int, + debugger_entry: Callable[[FrameType, Breakpoint | None, int, dict | None], None] + ): + self._action = None + self._frame = None + self._tool_id = tool_id + self._tasks = [] + self._bound_breakpoints: dict[CodeType, list[Breakpoint]] = {} + self._free_breakpoints: list[Breakpoint] = [] + self._debugger_entry = debugger_entry + self._code_with_events = set() + sys.monitoring.use_tool_id(tool_id, "MonitorGenie") + self._register_callbacks() + + # ======================================================================== + # ============================= Public API =============================== + # ======================================================================== + + def start_monitor(self, action: str, frame: FrameType): + """starts monitoring with the given action and frame""" + self._clear_monitor() + self._try_bind_breakpoints(frame.f_code) + self._set_events_for_breakpoints() + self._action, self._frame = action, frame + if action == "step": + self._add_local_events(frame.f_code, MEVENT.LINE | MEVENT.CALL | MEVENT.PY_RETURN \ + | MEVENT.PY_YIELD) + elif action == "next": + self._add_local_events(frame.f_code, MEVENT.LINE | MEVENT.PY_RETURN | MEVENT.PY_YIELD) + if frame.f_back: + self._add_local_events(frame.f_back.f_code, MEVENT.LINE | MEVENT.PY_RETURN | MEVENT.PY_YIELD) + elif action == "return": + if frame.f_back: + self._add_local_events(frame.f_back.f_code, MEVENT.LINE | MEVENT.PY_RETURN | MEVENT.PY_YIELD) + elif action == "continue": + pass + sys.monitoring.restart_events() + + def add_breakpoint(self, breakpoint: Breakpoint): + if breakpoint.code is None: + self._free_breakpoints.append(breakpoint) + else: + if breakpoint.code not in self._bound_breakpoints: + self._bound_breakpoints[breakpoint.code] = [] + self._bound_breakpoints[breakpoint.code].append(breakpoint) + + def remove_breakpoint(self, breakpoint: Breakpoint): + if breakpoint.code is None: + self._free_breakpoints.remove(breakpoint) + else: + self._bound_breakpoints[breakpoint.code].remove(breakpoint) + + # ======================================================================== + # ============================ Private API =============================== + # ======================================================================== + + def _clear_monitor(self): + sys.monitoring.set_events(self._tool_id, 0) + for code in self._code_with_events: + sys.monitoring.set_local_events(self._tool_id, code, 0) + + def _add_global_events(self, events): + curr_events = sys.monitoring.get_events(self._tool_id) + sys.monitoring.set_events(self._tool_id, curr_events | events) + + def _add_local_events(self, code, events): + curr_events = sys.monitoring.get_local_events(self._tool_id, code) + self._code_with_events.add(code) + sys.monitoring.set_local_events(self._tool_id, code, curr_events | events) + + def _set_events_for_breakpoints(self): + if self._free_breakpoints: + self._add_global_events(MEVENT.PY_START) + for code, bp_list in self._bound_breakpoints.items(): + for breakpoint in bp_list: + if breakpoint.line_number is not None: + self._add_local_events(code, MEVENT.LINE) + else: + self._add_local_events(code, MEVENT.PY_START) + + def _try_bind_breakpoints(self, code): + # copy the breakpoints so we can remove bp from it + bp_dirty = False + for bp in self._free_breakpoints[:]: + if bp.belong_to(code): + self.remove_breakpoint(bp) + bp.code = code + self.add_breakpoint(bp) + bp_dirty = True + break + if bp_dirty: + self._set_events_for_breakpoints() + if not self._free_breakpoints: + sys.monitoring.set_events(self._tool_id, 0) + + def _stophere(self, code): + if self._action == "step": + return True + elif self._action == "next": + return code == self._frame.f_code or code == self._frame.f_back.f_code + elif self._action == "return": + return code == self._frame.f_back.f_code + return False + + def _breakhere(self, code, line_number): + if code in self._bound_breakpoints: + for bp in self._bound_breakpoints[code]: + # There are two possible cases + # the line_number could be a real line number and match + # or the line_number is None which only be given by PY_START + # and will match on function breakpoints + if bp.line_number == line_number: + return bp + return None + + # Callbacks for the real sys.monitoring + + def _register_callbacks(self): + sys.monitoring.register_callback(self._tool_id, MEVENT.LINE, self._line_callback) + sys.monitoring.register_callback(self._tool_id, MEVENT.CALL, self._call_callback) + sys.monitoring.register_callback(self._tool_id, MEVENT.PY_START, self._start_callback) + sys.monitoring.register_callback(self._tool_id, MEVENT.PY_RETURN, self._return_callback) + + def _line_callback(self, code, line_number): + if bp := self._breakhere(code, line_number): + self._start_debugger(sys._getframe().f_back, bp, MEVENT.LINE, + {"code": code, "line_number": line_number}) + elif self._stophere(code): + self._start_debugger(sys._getframe().f_back, None, MEVENT.LINE, + {"code": code, "line_number": line_number}) + else: + return sys.monitoring.DISABLE + + def _call_callback(self, code, instruction_offset, callable, arg0): + # The only possible trigget for this is "step" action + # If the callable is instrumentable, do it, otherwise ignore it + code = None + if hasattr(callable, "__code__"): + code = callable.__code__ + elif hasattr(callable, "__call__"): + try: + code = callable.__call__.__func__.__code__ + except AttributeError: + pass + if code is not None: + self._add_local_events(code, MEVENT.LINE) + + def _start_callback(self, code, instruction_offset): + self._try_bind_breakpoints(code) + if bp := self._breakhere(code, None): + self._start_debugger(sys._getframe().f_back, bp, MEVENT.PY_START, + {"code": code, "instruction_offset": instruction_offset}) + elif self._stophere(code): + self._start_debugger(sys._getframe().f_back, None, MEVENT.PY_START, + {"code": code, "instruction_offset": instruction_offset}) + else: + return sys.monitoring.DISABLE + + def _return_callback(self, code, instruction_offset, retval): + if self._stophere(code): + self._start_debugger(sys._getframe().f_back, None, MEVENT.PY_RETURN, + {"code": code, "instruction_offset": instruction_offset, "retval": retval}) + else: + return sys.monitoring.DISABLE + + def _start_debugger(self, frame, breakpoint, event, args): + self._debugger_entry(frame, breakpoint, event, args) + + +@dataclass +class StopEvent: + frame: FrameType + line_number: int + is_call: bool = False + is_return: bool = False + + +class Bdbx: + """Bdbx is a singleton class that implements the debugger logic""" + _instance = None + + def __new__(cls): + if Bdbx._instance is None: + instance = super().__new__(cls) + instance._tool_id = sys.monitoring.DEBUGGER_ID + instance._monitor_genie = MonitorGenie(instance._tool_id, instance.monitor_callback) + Bdbx._instance = instance + return Bdbx._instance + + def __init__(self): + self._next_action = None + self._next_action_frame = None + self._stop_event = None + self._stop_frame = None + self._curr_frame = None + self._main_pyfile = '' + self.clear_breakpoints() + + # ======================================================================== + # ============================= Public API =============================== + # ======================================================================== + + def break_here(self, frame=None): + """break into the debugger as soon as possible""" + if frame is None: + frame = sys._getframe().f_back + self.set_action("next", frame) + self._monitor_genie.start_monitor(self._next_action, self._next_action_frame) + + def set_action(self, action, frame=None): + """Set the next action, if frame is None, use the current frame""" + if frame is None: + frame = self._curr_frame + + self._next_action = action + self._next_action_frame = frame + + def set_function_breakpoint(self, func): + if not hasattr(func, "__code__"): + raise BdbxSetBreakpointException(f"{func} is not a valid function!") + abspath = os.path.abspath(func.__code__.co_filename) + if not abspath: + raise BdbxSetBreakpointException(f"Cann't find the source file for {func}!") + # Setting line_number to None for function breakpoints + bp = Breakpoint(abspath, None, func.__code__) + self._breakpoints.append(bp) + self._monitor_genie.add_breakpoint(bp) + + def set_file_breakpoint(self, filename, line_number): + abspath = self._lookupmodule(filename) + if not abspath: + abspath = filename + #raise BdbxSetBreakpointException(f"{filename} is not a valid file name!") + try: + line_number = int(line_number) + except ValueError: + raise BdbxSetBreakpointException(f"{line_number} is not a valid line number!") + bp = Breakpoint(abspath, line_number, None) + self._breakpoints.append(bp) + self._monitor_genie.add_breakpoint(bp) + + def clear_breakpoints(self): + if hasattr(self, "_breakpoints"): + for bp in self._breakpoints: + self._monitor_genie.remove_breakpoint(bp) + self._breakpoints = [] + + # Data accessors + + def get_current_frame(self): + """Get the current frame""" + return self._curr_frame + + def get_stack(self): + """Get the current stack""" + return self._stack + + def get_breakpoints(self): + """Get all the breakpoints""" + return self._breakpoints + + # Interface to be implemented by the debugger + + def dispatch_event(self, event: StopEvent): + pass + + # communication with MonitorGenie + + def monitor_callback(self, frame, breakpoint, event, event_arg): + """Callback entry from MonitorGenie""" + + self._curr_breakpoint = breakpoint + self._stop_frame = frame + self._curr_frame = frame + self._stack = self._get_stack_from_frame(frame) + + if event == MEVENT.LINE: + self._stop_event = StopEvent(frame, event_arg["line_number"]) + elif event == MEVENT.PY_START or event == MEVENT.PY_RESUME: + self._stop_event = StopEvent(frame, 0, is_call=True) + elif event == MEVENT.PY_RETURN or event == MEVENT.PY_YIELD: + self._stop_event = StopEvent(frame, 0, is_return=True) + else: + raise RuntimeError("Not supposed to be here") + + self.dispatch_event(self._stop_event) + + # After the dispatch returns, reset the monitor + self._monitor_genie.start_monitor(self._next_action, self._next_action_frame) + + # ======================================================================== + # ======================= Helper functions =============================== + # ======================================================================== + + def _get_stack_from_frame(self, frame): + """Get call stack from the latest frame, oldest frame at [0]""" + stack = [] + while frame: + stack.append(frame) + frame = frame.f_back + return reversed(stack) + + def _canonic(self, filename): + """Return canonical form of filename. + + For real filenames, the canonical form is a case-normalized (on + case insensitive filesystems) absolute path. 'Filenames' with + angle brackets, such as "", generated in interactive + mode, are returned unchanged. + """ + if filename == "<" + filename[1:-1] + ">": + return filename + canonic = self.fncache.get(filename) + if not canonic: + canonic = os.path.abspath(filename) + canonic = os.path.normcase(canonic) + self.fncache[filename] = canonic + return canonic + + def _lookupmodule(self, filename): + """Helper function for break/clear parsing -- may be overridden. + + lookupmodule() translates (possibly incomplete) file or module name + into an absolute file name. + """ + if os.path.isabs(filename) and os.path.exists(filename): + return filename + f = os.path.join(sys.path[0], filename) + if os.path.exists(f) and self._canonic(f) == self._main_pyfile: + return f + root, ext = os.path.splitext(filename) + if ext == '': + filename = filename + '.py' + if os.path.isabs(filename): + return filename + for dirname in sys.path: + while os.path.islink(dirname): + dirname = os.readlink(dirname) + fullname = os.path.join(dirname, filename) + if os.path.exists(fullname): + return fullname + return None + + +class Pdbx(Bdbx, cmd.Cmd): + def __init__(self): + self._event = None + self.prompt = "(Pdbx) " + self.fncache = {} + Bdbx.__init__(self) + cmd.Cmd.__init__(self, 'tab', None, None) + + # ======================================================================== + # ======================= Interface to Bdbx ============================== + # ======================================================================== + + def dispatch_event(self, event): + self._event = event + self.print_header() + self.cmdloop() + + # ======================================================================== + # ================= Methods that can be overwritten ====================== + # ======================================================================== + + def error(self, msg): + print(msg) + + def message(self, msg): + print(msg) + + def print_header(self): + if self._event.is_call: + print("----call----") + elif self._event.is_return: + print("----return----") + if self._event.line_number: + lineno = self._event.line_number + else: + lineno = self._event.frame.f_lineno + self._print_stack_entry(self._event.frame, lineno) + + # ======================================================================== + # ======================== helper functions ============================== + # ======================================================================== + + @property + def _default_file(self): + """Produce a reasonable default.""" + filename = self.get_current_frame().f_code.co_filename + if filename == '' and self._main_pyfile: + filename = self._main_pyfile + return filename + + def _format_stack_entry(self, frame, lineno, lprefix=': '): + """Return a string with information about a stack entry. + + The stack entry frame_lineno is a (frame, lineno) tuple. The + return string contains the canonical filename, the function name + or '', the input arguments, the return value, and the + line of code (if it exists). + + """ + filename = self._canonic(frame.f_code.co_filename) + s = '%s(%r)' % (filename, lineno) + if frame.f_code.co_name: + s += frame.f_code.co_name + else: + s += "" + s += '()' + if '__return__' in frame.f_locals: + rv = frame.f_locals['__return__'] + s += '->' + s += reprlib.repr(rv) + line = linecache.getline(filename, lineno, frame.f_globals) + if line: + s += lprefix + line.strip() + return s + + def _print_stack_entry(self, frame, lineno): + if frame is self.get_current_frame(): + prefix = '> ' + else: + prefix = ' ' + self.message(prefix + + self._format_stack_entry(frame, lineno, '\n-> ')) + + def _checkline(self, filename, lineno): + """Check whether specified line seems to be executable. + + Return `lineno` if it is, 0 if not (e.g. a docstring, comment, blank + line or EOF). Warning: testing is not comprehensive. + """ + # this method should be callable before starting debugging, so default + # to "no globals" if there is no current frame + globs = self.get_current_frame().f_globals if self.get_current_frame() else None + line = linecache.getline(filename, lineno, globs) + if not line: + self.message('End of file') + return 0 + line = line.strip() + # Don't allow setting breakpoint at a blank line + if (not line or (line[0] == '#') or + (line[:3] == '"""') or line[:3] == "'''"): + self.error('Blank or comment') + return 0 + return lineno + + def _lineinfo(self, identifier): + failed = (None, None, None) + # Input is identifier, may be in single quotes + idstring = identifier.split("'") + if len(idstring) == 1: + # not in single quotes + id = idstring[0].strip() + elif len(idstring) == 3: + # quoted + id = idstring[1].strip() + else: + return failed + if id == '': return failed + parts = id.split('.') + # Protection for derived debuggers + if parts[0] == 'self': + del parts[0] + if len(parts) == 0: + return failed + # Best first guess at file to look at + fname = self.defaultFile() + if len(parts) == 1: + item = parts[0] + else: + # More than one part. + # First is module, second is method/class + f = self._lookupmodule(parts[0]) + if f: + fname = f + item = parts[1] + answer = self._find_function(item, fname) + return answer or failed + + def _find_function(funcname, filename): + cre = re.compile(r'def\s+%s\s*[(]' % re.escape(funcname)) + try: + fp = tokenize.open(filename) + except OSError: + return None + # consumer of this info expects the first line to be 1 + with fp: + for lineno, line in enumerate(fp, start=1): + if cre.match(line): + return funcname, filename, lineno + return None + + # ====================================================================== + # The following methods are called by the cmd.Cmd base class + # All the commands are in alphabetic order + # ====================================================================== + + def do_break(self, arg): + if not arg: + print(self.get_breakpoints()) + return False + # parse arguments; comma has lowest precedence + # and cannot occur in filename + filename = None + lineno = None + # parse stuff before comma: [filename:]lineno | function + colon = arg.rfind(':') + if colon >= 0: + filename = arg[:colon].rstrip() + line_number = arg[colon+1:] + try: + self.set_file_breakpoint(filename, line_number) + except BdbxSetBreakpointException as e: + self.error(e) + return False + else: + # no colon; can be lineno or function + try: + lineno = int(arg) + except ValueError: + try: + frame = self.get_current_frame() + func = eval(arg, + frame.f_globals, + frame.f_locals) + except: + func = arg + (ok, filename, line_number) = self._lineinfo(arg) + if not ok: + self.error(f"Can't find function '{arg}'") + return False + try: + self.set_file_breakpoint(filename, line_number) + except BdbxSetBreakpointException as e: + self.error(e) + return False + try: + self.set_function_breakpoint(func) + except BdbxSetBreakpointException as e: + self.error(e) + return False + return False + if not filename: + filename = self._default_file + # Check for reasonable breakpoint + line = self._checkline(filename, lineno) + if line: + # now set the break point + self.set_file_breakpoint(filename, line) + return False + + do_b = do_break + + def do_clear(self, arg): + self.clear_breakpoints() + return False + + def do_continue(self, arg): + self.set_action("continue") + return True + + do_c = do_continue + + def do_next(self, arg): + self.set_action("next") + return True + + do_n = do_next + + def do_quit(self, arg): + raise Exception("quit") + + do_q = do_quit + + def do_return(self, arg): + self.set_action("return") + return True + + do_r = do_return + + def do_step(self, arg): + self.set_action("step") + return True + + do_s = do_step + + def do_where(self, arg): + try: + for frame in self.get_stack(): + lineno = frame.f_lineno + self._print_stack_entry(frame, lineno) + except KeyboardInterrupt: + pass + return False + + do_w = do_where + + +def break_here(): + pdb = Pdbx() + pdb.break_here(sys._getframe().f_back) diff --git a/Lib/test/test_pdbx.py b/Lib/test/test_pdbx.py new file mode 100644 index 00000000000000..57b89a39e31f43 --- /dev/null +++ b/Lib/test/test_pdbx.py @@ -0,0 +1,106 @@ +from bdbx import Pdbx +import doctest +import sys +from test.test_doctest import _FakeInput + +class PdbxTestInput(object): + """Context manager that makes testing Pdb in doctests easier.""" + + def __init__(self, input): + self.input = input + + def __enter__(self): + self.real_stdin = sys.stdin + sys.stdin = _FakeInput(self.input) + + def __exit__(self, *exc): + sys.stdin = self.real_stdin + +def test_pdbx_basic_commands(): + """Test the basic commands of pdb. + + >>> def f(x): + ... x = x + 1 + ... return x + + >>> def test_function(): + ... import bdbx; bdbx.break_here() + ... for i in range(5): + ... n = f(i) + ... pass + + >>> with PdbxTestInput([ # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + ... 'step', + ... 'step', + ... 'return', + ... 'next', + ... 'n', + ... 'continue', + ... ]): + ... test_function() + > (3)test_function() + -> for i in range(5): + (Pdbx) step + > (4)test_function() + -> n = f(i) + (Pdbx) step + > (2)f() + -> x = x + 1 + (Pdbx) return + > (5)test_function() + -> pass + (Pdbx) next + > (4)test_function() + -> n = f(i) + (Pdbx) n + > (5)test_function() + -> pass + (Pdbx) continue + """ + +def test_pdbx_basic_breakpoint(): + """Test the breakpoints of pdbx. + + >>> def f(x): + ... x = x + 1 + ... return x + + >>> def test_function(): + ... import bdbx; bdbx.break_here() + ... for i in range(5): + ... n = f(i) + ... pass + ... a = 3 + + >>> with PdbxTestInput([ # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + ... 'break f', + ... 'continue', + ... 'clear', + ... 'return', + ... 'break 6', + ... 'continue', + ... 'continue', + ... ]): + ... test_function() + > (3)test_function() + -> for i in range(5): + (Pdbx) break f + (Pdbx) continue + ----call---- + > (1)f() + -> def f(x): + (Pdbx) clear + (Pdbx) return + > (5)test_function() + -> pass + (Pdbx) break 6 + (Pdbx) continue + > (6)test_function() + -> a = 3 + (Pdbx) continue + """ + +def load_tests(loader, tests, pattern): + from test import test_pdbx + tests.addTest(doctest.DocTestSuite(test_pdbx)) + return tests From 73282434ab48e71b0fbf02fe3ad56fc439b6b5fb Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Wed, 19 Apr 2023 17:08:18 -0700 Subject: [PATCH 2/7] Add more features to bdbx Fixed step issue --- Lib/bdbx.py | 368 ++++++++++++++++++++++++------------------ Lib/test/test_pdbx.py | 104 +++++++++++- 2 files changed, 312 insertions(+), 160 deletions(-) diff --git a/Lib/bdbx.py b/Lib/bdbx.py index eda6fd3728a88e..1236d3ea3fc7fe 100644 --- a/Lib/bdbx.py +++ b/Lib/bdbx.py @@ -19,13 +19,21 @@ class BdbxSetBreakpointException(Exception): pass +class BdbxQuit(Exception): + pass + + class Breakpoint: - def __init__(self, file, line_number, code): + _next_id = 1 + + def __init__(self, file: str, line_number: int, code: CodeType | None = None): + self._id = self._get_next_id() self.file = file self.line_number = line_number self.code = code - def belong_to(self, code): + def belong_to(self, code: CodeType): + """returns True if the breakpoint belongs to the given code object""" if self.file == code.co_filename and \ self.line_number >= code.co_firstlineno: for instr in dis.get_instructions(code): @@ -33,6 +41,14 @@ def belong_to(self, code): return True return False + def __str__(self): + return f"Breakpoint {self._id} at {self.file}:{self.line_number}" + + @classmethod + def _get_next_id(cls): + cls._next_id += 1 + return cls._next_id - 1 + class MonitorGenie: """ @@ -54,6 +70,7 @@ def __init__( self._action = None self._frame = None self._tool_id = tool_id + self._returning = False self._tasks = [] self._bound_breakpoints: dict[CodeType, list[Breakpoint]] = {} self._free_breakpoints: list[Breakpoint] = [] @@ -73,20 +90,28 @@ def start_monitor(self, action: str, frame: FrameType): self._set_events_for_breakpoints() self._action, self._frame = action, frame if action == "step": - self._add_local_events(frame.f_code, MEVENT.LINE | MEVENT.CALL | MEVENT.PY_RETURN \ - | MEVENT.PY_YIELD) + if not self._returning: + self._add_local_events(frame.f_code, MEVENT.LINE | MEVENT.CALL | MEVENT.PY_RETURN \ + | MEVENT.PY_YIELD) + elif frame.f_back: + self._add_local_events(frame.f_back.f_code, MEVENT.LINE | MEVENT.CALL | MEVENT.PY_RETURN \ + | MEVENT.PY_YIELD) elif action == "next": - self._add_local_events(frame.f_code, MEVENT.LINE | MEVENT.PY_RETURN | MEVENT.PY_YIELD) - if frame.f_back: + if not self._returning: + self._add_local_events(frame.f_code, MEVENT.LINE | MEVENT.PY_RETURN | MEVENT.PY_YIELD) + elif frame.f_back: self._add_local_events(frame.f_back.f_code, MEVENT.LINE | MEVENT.PY_RETURN | MEVENT.PY_YIELD) elif action == "return": if frame.f_back: self._add_local_events(frame.f_back.f_code, MEVENT.LINE | MEVENT.PY_RETURN | MEVENT.PY_YIELD) elif action == "continue": pass + + self._returning = False sys.monitoring.restart_events() def add_breakpoint(self, breakpoint: Breakpoint): + """adds a breakpoint to the list of breakpoints""" if breakpoint.code is None: self._free_breakpoints.append(breakpoint) else: @@ -95,6 +120,7 @@ def add_breakpoint(self, breakpoint: Breakpoint): self._bound_breakpoints[breakpoint.code].append(breakpoint) def remove_breakpoint(self, breakpoint: Breakpoint): + """removes a breakpoint from the list of breakpoints""" if breakpoint.code is None: self._free_breakpoints.remove(breakpoint) else: @@ -169,6 +195,7 @@ def _register_callbacks(self): sys.monitoring.register_callback(self._tool_id, MEVENT.LINE, self._line_callback) sys.monitoring.register_callback(self._tool_id, MEVENT.CALL, self._call_callback) sys.monitoring.register_callback(self._tool_id, MEVENT.PY_START, self._start_callback) + sys.monitoring.register_callback(self._tool_id, MEVENT.PY_YIELD, self._return_callback) sys.monitoring.register_callback(self._tool_id, MEVENT.PY_RETURN, self._return_callback) def _line_callback(self, code, line_number): @@ -208,6 +235,7 @@ def _start_callback(self, code, instruction_offset): def _return_callback(self, code, instruction_offset, retval): if self._stophere(code): + self._returning = True self._start_debugger(sys._getframe().f_back, None, MEVENT.PY_RETURN, {"code": code, "instruction_offset": instruction_offset, "retval": retval}) else: @@ -277,15 +305,12 @@ def set_function_breakpoint(self, func): self._monitor_genie.add_breakpoint(bp) def set_file_breakpoint(self, filename, line_number): - abspath = self._lookupmodule(filename) - if not abspath: - abspath = filename - #raise BdbxSetBreakpointException(f"{filename} is not a valid file name!") - try: - line_number = int(line_number) - except ValueError: - raise BdbxSetBreakpointException(f"{line_number} is not a valid line number!") - bp = Breakpoint(abspath, line_number, None) + """Set a breakpoint at the given line number in the given file + + The caller is responsible for checking that the file exists and + that the line number is valid. + """ + bp = Breakpoint(filename, line_number, None) self._breakpoints.append(bp) self._monitor_genie.add_breakpoint(bp) @@ -295,12 +320,27 @@ def clear_breakpoints(self): self._monitor_genie.remove_breakpoint(bp) self._breakpoints = [] + def select_frame(self, index, offset=False): + """Select a frame in the stack""" + if offset: + index += self._curr_frame_idx + if index < 0: + index = 0 + elif index >= len(self._stack): + index = len(self._stack) - 1 + self._curr_frame_idx = index + self._curr_frame = self._stack[index] + # Data accessors def get_current_frame(self): """Get the current frame""" return self._curr_frame + def get_current_frame_idx(self): + """Get the current frame index""" + return self._curr_frame_idx + def get_stack(self): """Get the current stack""" return self._stack @@ -323,6 +363,7 @@ def monitor_callback(self, frame, breakpoint, event, event_arg): self._stop_frame = frame self._curr_frame = frame self._stack = self._get_stack_from_frame(frame) + self._curr_frame_idx = len(self._stack) - 1 if event == MEVENT.LINE: self._stop_event = StopEvent(frame, event_arg["line_number"]) @@ -348,48 +389,8 @@ def _get_stack_from_frame(self, frame): while frame: stack.append(frame) frame = frame.f_back - return reversed(stack) - - def _canonic(self, filename): - """Return canonical form of filename. - - For real filenames, the canonical form is a case-normalized (on - case insensitive filesystems) absolute path. 'Filenames' with - angle brackets, such as "", generated in interactive - mode, are returned unchanged. - """ - if filename == "<" + filename[1:-1] + ">": - return filename - canonic = self.fncache.get(filename) - if not canonic: - canonic = os.path.abspath(filename) - canonic = os.path.normcase(canonic) - self.fncache[filename] = canonic - return canonic - - def _lookupmodule(self, filename): - """Helper function for break/clear parsing -- may be overridden. - - lookupmodule() translates (possibly incomplete) file or module name - into an absolute file name. - """ - if os.path.isabs(filename) and os.path.exists(filename): - return filename - f = os.path.join(sys.path[0], filename) - if os.path.exists(f) and self._canonic(f) == self._main_pyfile: - return f - root, ext = os.path.splitext(filename) - if ext == '': - filename = filename + '.py' - if os.path.isabs(filename): - return filename - for dirname in sys.path: - while os.path.islink(dirname): - dirname = os.readlink(dirname) - fullname = os.path.join(dirname, filename) - if os.path.exists(fullname): - return fullname - return None + stack.reverse() + return stack class Pdbx(Bdbx, cmd.Cmd): @@ -467,15 +468,58 @@ def _format_stack_entry(self, frame, lineno, lprefix=': '): s += lprefix + line.strip() return s - def _print_stack_entry(self, frame, lineno): + def _print_stack_entry(self, frame, line_number=None): if frame is self.get_current_frame(): prefix = '> ' else: prefix = ' ' + if line_number is None: + line_number = frame.f_lineno self.message(prefix + - self._format_stack_entry(frame, lineno, '\n-> ')) + self._format_stack_entry(frame, line_number, '\n-> ')) + + def _canonic(self, filename): + """Return canonical form of filename. + + For real filenames, the canonical form is a case-normalized (on + case insensitive filesystems) absolute path. 'Filenames' with + angle brackets, such as "", generated in interactive + mode, are returned unchanged. + """ + if filename == "<" + filename[1:-1] + ">": + return filename + canonic = self.fncache.get(filename) + if not canonic: + canonic = os.path.abspath(filename) + canonic = os.path.normcase(canonic) + self.fncache[filename] = canonic + return canonic + + def _lookupmodule(self, filename): + """Helper function for break/clear parsing -- may be overridden. - def _checkline(self, filename, lineno): + lookupmodule() translates (possibly incomplete) file or module name + into an absolute file name. + """ + if os.path.isabs(filename) and os.path.exists(filename): + return filename + f = os.path.join(sys.path[0], filename) + if os.path.exists(f) and self._canonic(f) == self._main_pyfile: + return f + root, ext = os.path.splitext(filename) + if ext == '': + filename = filename + '.py' + if os.path.isabs(filename): + return filename + for dirname in sys.path: + while os.path.islink(dirname): + dirname = os.readlink(dirname) + fullname = os.path.join(dirname, filename) + if os.path.exists(fullname): + return fullname + return None + + def _confirm_line_executable(self, filename, line_number): """Check whether specified line seems to be executable. Return `lineno` if it is, 0 if not (e.g. a docstring, comment, blank @@ -484,121 +528,90 @@ def _checkline(self, filename, lineno): # this method should be callable before starting debugging, so default # to "no globals" if there is no current frame globs = self.get_current_frame().f_globals if self.get_current_frame() else None - line = linecache.getline(filename, lineno, globs) + line = linecache.getline(filename, line_number, globs) if not line: - self.message('End of file') - return 0 + raise ValueError(f"Can't find line {line_number} in file {filename}") line = line.strip() # Don't allow setting breakpoint at a blank line if (not line or (line[0] == '#') or (line[:3] == '"""') or line[:3] == "'''"): - self.error('Blank or comment') - return 0 - return lineno - - def _lineinfo(self, identifier): - failed = (None, None, None) - # Input is identifier, may be in single quotes - idstring = identifier.split("'") - if len(idstring) == 1: - # not in single quotes - id = idstring[0].strip() - elif len(idstring) == 3: - # quoted - id = idstring[1].strip() - else: - return failed - if id == '': return failed - parts = id.split('.') - # Protection for derived debuggers - if parts[0] == 'self': - del parts[0] - if len(parts) == 0: - return failed - # Best first guess at file to look at - fname = self.defaultFile() - if len(parts) == 1: - item = parts[0] - else: - # More than one part. - # First is module, second is method/class - f = self._lookupmodule(parts[0]) - if f: - fname = f - item = parts[1] - answer = self._find_function(item, fname) - return answer or failed - - def _find_function(funcname, filename): - cre = re.compile(r'def\s+%s\s*[(]' % re.escape(funcname)) - try: - fp = tokenize.open(filename) - except OSError: - return None - # consumer of this info expects the first line to be 1 - with fp: - for lineno, line in enumerate(fp, start=1): - if cre.match(line): - return funcname, filename, lineno - return None + raise ValueError(f"Can't set breakpoint at line {line_number} in file {filename}, it's blank or a comment") - # ====================================================================== - # The following methods are called by the cmd.Cmd base class - # All the commands are in alphabetic order - # ====================================================================== + def _parse_breakpoint_arg(self, arg): + """Parse a breakpoint argument. - def do_break(self, arg): - if not arg: - print(self.get_breakpoints()) - return False - # parse arguments; comma has lowest precedence - # and cannot occur in filename + Return a tuple (filename, lineno, function, condition) + from [filename:]lineno | function, condition + """ filename = None - lineno = None + line_number = None + function = None + condition = None + # parse stuff after comma: condition + comma = arg.find(',') + if comma >= 0: + condition = arg[comma+1:].lstrip() + arg = arg[:comma] + # parse stuff before comma: [filename:]lineno | function colon = arg.rfind(':') if colon >= 0: filename = arg[:colon].rstrip() + filename = self._lookupmodule(filename) + if filename is None: + raise ValueError(f"Invalid filename: {filename}") line_number = arg[colon+1:] try: - self.set_file_breakpoint(filename, line_number) - except BdbxSetBreakpointException as e: - self.error(e) - return False + line_number = int(line_number) + except ValueError: + raise ValueError(f"Invalid line number: {line_number}") else: # no colon; can be lineno or function try: - lineno = int(arg) - except ValueError: - try: - frame = self.get_current_frame() - func = eval(arg, + # Maybe it's a function? + frame = self.get_current_frame() + function = eval(arg, frame.f_globals, frame.f_locals) - except: - func = arg - (ok, filename, line_number) = self._lineinfo(arg) - if not ok: - self.error(f"Can't find function '{arg}'") - return False - try: - self.set_file_breakpoint(filename, line_number) - except BdbxSetBreakpointException as e: - self.error(e) - return False + if hasattr(function, "__func__"): + # It's a method + function = function.__func__ + code = function.__code__ + filename = code.co_filename + line_number = code.co_firstlineno + except: + # Then it has to be a line number + function = None try: - self.set_function_breakpoint(func) - except BdbxSetBreakpointException as e: - self.error(e) - return False - return False - if not filename: - filename = self._default_file - # Check for reasonable breakpoint - line = self._checkline(filename, lineno) - if line: - # now set the break point - self.set_file_breakpoint(filename, line) + line_number = int(arg) + except ValueError: + raise ValueError(f"Invalid breakpoint argument: {arg}") + filename = self._default_file + # Before returning, check that line on the file is executable + self._confirm_line_executable(filename, line_number) + + return filename, line_number, function, condition + + # ====================================================================== + # The following methods are called by the cmd.Cmd base class + # All the commands are in alphabetic order + # ====================================================================== + + def do_break(self, arg): + if not arg: + for bp in self.get_breakpoints(): + print(bp) + return False + try: + filename, line_number, function, condition = self._parse_breakpoint_arg(arg) + except ValueError as exc: + self.error(str(exc)) + return False + + if function: + self.set_function_breakpoint(function) + else: + self.set_file_breakpoint(filename, line_number) return False do_b = do_break @@ -613,6 +626,27 @@ def do_continue(self, arg): do_c = do_continue + def do_down(self, arg): + """d(own) [count] + + Move the current frame count (default one) levels down in the + stack trace (to a newer frame). + """ + try: + count = int(arg or 1) + except ValueError: + self.error("Invalid count") + return False + + self.select_frame(count, offset=True) + self._print_stack_entry(self.get_current_frame()) + return False + + do_d = do_down + + def do_EOF(self, arg): + raise BdbxQuit("quit") + def do_next(self, arg): self.set_action("next") return True @@ -620,7 +654,7 @@ def do_next(self, arg): do_n = do_next def do_quit(self, arg): - raise Exception("quit") + raise BdbxQuit("quit") do_q = do_quit @@ -636,11 +670,29 @@ def do_step(self, arg): do_s = do_step + def do_up(self, arg): + """u(p) [count] + + Move the current frame count (default one) levels up in the + stack trace (to an older frame). + """ + + try: + count = int(arg or 1) + except ValueError: + self.error("Invalid count") + return False + + self.select_frame(-count, offset=True) + self._print_stack_entry(self.get_current_frame()) + return False + + do_u = do_up + def do_where(self, arg): try: for frame in self.get_stack(): - lineno = frame.f_lineno - self._print_stack_entry(frame, lineno) + self._print_stack_entry(frame) except KeyboardInterrupt: pass return False diff --git a/Lib/test/test_pdbx.py b/Lib/test/test_pdbx.py index 57b89a39e31f43..63bea4cb9e713c 100644 --- a/Lib/test/test_pdbx.py +++ b/Lib/test/test_pdbx.py @@ -16,6 +16,7 @@ def __enter__(self): def __exit__(self, *exc): sys.stdin = self.real_stdin + def test_pdbx_basic_commands(): """Test the basic commands of pdb. @@ -32,12 +33,17 @@ def test_pdbx_basic_commands(): >>> with PdbxTestInput([ # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE ... 'step', ... 'step', + ... 'step', + ... 'step', + ... 'step', + ... 'step', + ... 'step', ... 'return', ... 'next', ... 'n', ... 'continue', ... ]): - ... test_function() + ... test_function() > (3)test_function() -> for i in range(5): (Pdbx) step @@ -46,6 +52,22 @@ def test_pdbx_basic_commands(): (Pdbx) step > (2)f() -> x = x + 1 + (Pdbx) step + > (3)f() + -> return x + (Pdbx) step + ----return---- + > (3)f() + -> return x + (Pdbx) step + > (5)test_function() + -> pass + (Pdbx) step + > (4)test_function() + -> n = f(i) + (Pdbx) step + > (2)f() + -> x = x + 1 (Pdbx) return > (5)test_function() -> pass @@ -73,17 +95,21 @@ def test_pdbx_basic_breakpoint(): ... a = 3 >>> with PdbxTestInput([ # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + ... 'break invalid', ... 'break f', ... 'continue', ... 'clear', ... 'return', ... 'break 6', + ... 'break 100', ... 'continue', ... 'continue', ... ]): - ... test_function() + ... test_function() > (3)test_function() -> for i in range(5): + (Pdbx) break invalid + Invalid breakpoint argument: invalid (Pdbx) break f (Pdbx) continue ----call---- @@ -94,12 +120,86 @@ def test_pdbx_basic_breakpoint(): > (5)test_function() -> pass (Pdbx) break 6 + (Pdbx) break 100 + Can't find line 100 in file (Pdbx) continue > (6)test_function() -> a = 3 (Pdbx) continue """ +def test_pdbx_where_command(): + """Test where command + + >>> def g(): + ... import bdbx; bdbx.break_here() + ... pass + + >>> def f(): + ... g(); + + >>> def test_function(): + ... f() + + >>> with PdbxTestInput([ # doctest: +ELLIPSIS + ... 'w', + ... 'where', + ... 'u', + ... 'w', + ... 'd', + ... 'w', + ... 'continue', + ... ]): + ... test_function() + > (3)g() + -> pass + (Pdbx) w + ... + (10)() + -> test_function() + (2)test_function() + -> f() + (2)f() + -> g(); + > (3)g() + -> pass + (Pdbx) where + ... + (10)() + -> test_function() + (2)test_function() + -> f() + (2)f() + -> g(); + > (3)g() + -> pass + (Pdbx) u + > (2)f() + -> g(); + (Pdbx) w + ... + (2)test_function() + -> f() + > (2)f() + -> g(); + (3)g() + -> pass + (Pdbx) d + > (3)g() + -> pass + (Pdbx) w + ... + (10)() + -> test_function() + (2)test_function() + -> f() + (2)f() + -> g(); + > (3)g() + -> pass + (Pdbx) continue + """ + def load_tests(loader, tests, pattern): from test import test_pdbx tests.addTest(doctest.DocTestSuite(test_pdbx)) From fe694b9f334cf75d6c4ca1ab99daf70de475c043 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Tue, 25 Apr 2023 11:36:22 -0700 Subject: [PATCH 3/7] Add script target --- Lib/bdbx.py | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/Lib/bdbx.py b/Lib/bdbx.py index 1236d3ea3fc7fe..ca54bdf4f82c90 100644 --- a/Lib/bdbx.py +++ b/Lib/bdbx.py @@ -1,5 +1,7 @@ +import argparse import cmd import dis +import io import linecache import os import re @@ -253,6 +255,28 @@ class StopEvent: is_return: bool = False +class _ExecuteTarget: + pass + + +class _ScriptTarget(_ExecuteTarget): + def __init__(self, filename): + self.filename = filename + if not os.path.exists(filename): + raise FileNotFoundError(filename) + with io.open_code(filename) as f: + self.code = compile(f.read(), filename, "exec") + + @property + def namespaces(self): + ns = { + '__name__': '__main__', + '__file__': self.filename, + '__builtins__': __builtins__, + } + return ns, ns + + class Bdbx: """Bdbx is a singleton class that implements the debugger logic""" _instance = None @@ -383,6 +407,21 @@ def monitor_callback(self, frame, breakpoint, event, event_arg): # ======================= Helper functions =============================== # ======================================================================== + def _run_target(self, target:_ExecuteTarget): + """Debug the given code object in __main__""" + import __main__ + main_dict = __main__.__dict__.copy() + globals, locals = target.namespaces + __main__.__dict__.clear() + __main__.__dict__.update(globals) + self.break_here() + try: + exec(target.code, globals, locals) + except BdbxQuit: + pass + __main__.__dict__.clear() + __main__.__dict__.update(main_dict) + def _get_stack_from_frame(self, frame): """Get call stack from the latest frame, oldest frame at [0]""" stack = [] @@ -401,6 +440,11 @@ def __init__(self): Bdbx.__init__(self) cmd.Cmd.__init__(self, 'tab', None, None) + # ======================================================================== + # ============================ User APIs ================================= + # ======================================================================== + + # ======================================================================== # ======================= Interface to Bdbx ============================== # ======================================================================== @@ -703,3 +747,27 @@ def do_where(self, arg): def break_here(): pdb = Pdbx() pdb.break_here(sys._getframe().f_back) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-m', nargs='?', default=None) + options, commands = parser.parse_known_args() + + if options.m: + # Run module + pass + elif commands: + # Run scripts + target = _ScriptTarget(commands[0]) + else: + # Show help message + parser.print_help() + + pdbx = Pdbx() + pdbx._run_target(target) + + +if __name__ == '__main__': + import bdbx + bdbx.main() From b21cc29ece7a5e7069ce297929072e78707ec3c7 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Mon, 1 May 2023 15:30:45 -0700 Subject: [PATCH 4/7] Separate pdbx from bdbx --- Lib/bdbx.py | 398 ++------------------------------------- Lib/pdbx.py | 419 ++++++++++++++++++++++++++++++++++++++++++ Lib/test/test_pdbx.py | 8 +- 3 files changed, 435 insertions(+), 390 deletions(-) create mode 100644 Lib/pdbx.py diff --git a/Lib/bdbx.py b/Lib/bdbx.py index ca54bdf4f82c90..dcda33360785b8 100644 --- a/Lib/bdbx.py +++ b/Lib/bdbx.py @@ -1,13 +1,6 @@ -import argparse -import cmd import dis -import io -import linecache import os -import re -import reprlib import sys -import tokenize from dataclasses import dataclass from types import FrameType, CodeType @@ -112,6 +105,13 @@ def start_monitor(self, action: str, frame: FrameType): self._returning = False sys.monitoring.restart_events() + def start_monitor_code(self, code: CodeType): + """starts monitoring when the given code is executed""" + self._clear_monitor() + self._action, self._frame = "step", None + self._add_local_events(code, MEVENT.LINE | MEVENT.CALL | MEVENT.PY_RETURN | MEVENT.PY_YIELD) + sys.monitoring.restart_events() + def add_breakpoint(self, breakpoint: Breakpoint): """adds a breakpoint to the list of breakpoints""" if breakpoint.code is None: @@ -254,29 +254,6 @@ class StopEvent: is_call: bool = False is_return: bool = False - -class _ExecuteTarget: - pass - - -class _ScriptTarget(_ExecuteTarget): - def __init__(self, filename): - self.filename = filename - if not os.path.exists(filename): - raise FileNotFoundError(filename) - with io.open_code(filename) as f: - self.code = compile(f.read(), filename, "exec") - - @property - def namespaces(self): - ns = { - '__name__': '__main__', - '__file__': self.filename, - '__builtins__': __builtins__, - } - return ns, ns - - class Bdbx: """Bdbx is a singleton class that implements the debugger logic""" _instance = None @@ -309,6 +286,11 @@ def break_here(self, frame=None): self.set_action("next", frame) self._monitor_genie.start_monitor(self._next_action, self._next_action_frame) + def break_code(self, code): + """break into the debugger when the given code is executed""" + self.set_action("step", None) + self._monitor_genie.start_monitor_code(code) + def set_action(self, action, frame=None): """Set the next action, if frame is None, use the current frame""" if frame is None: @@ -407,21 +389,6 @@ def monitor_callback(self, frame, breakpoint, event, event_arg): # ======================= Helper functions =============================== # ======================================================================== - def _run_target(self, target:_ExecuteTarget): - """Debug the given code object in __main__""" - import __main__ - main_dict = __main__.__dict__.copy() - globals, locals = target.namespaces - __main__.__dict__.clear() - __main__.__dict__.update(globals) - self.break_here() - try: - exec(target.code, globals, locals) - except BdbxQuit: - pass - __main__.__dict__.clear() - __main__.__dict__.update(main_dict) - def _get_stack_from_frame(self, frame): """Get call stack from the latest frame, oldest frame at [0]""" stack = [] @@ -430,344 +397,3 @@ def _get_stack_from_frame(self, frame): frame = frame.f_back stack.reverse() return stack - - -class Pdbx(Bdbx, cmd.Cmd): - def __init__(self): - self._event = None - self.prompt = "(Pdbx) " - self.fncache = {} - Bdbx.__init__(self) - cmd.Cmd.__init__(self, 'tab', None, None) - - # ======================================================================== - # ============================ User APIs ================================= - # ======================================================================== - - - # ======================================================================== - # ======================= Interface to Bdbx ============================== - # ======================================================================== - - def dispatch_event(self, event): - self._event = event - self.print_header() - self.cmdloop() - - # ======================================================================== - # ================= Methods that can be overwritten ====================== - # ======================================================================== - - def error(self, msg): - print(msg) - - def message(self, msg): - print(msg) - - def print_header(self): - if self._event.is_call: - print("----call----") - elif self._event.is_return: - print("----return----") - if self._event.line_number: - lineno = self._event.line_number - else: - lineno = self._event.frame.f_lineno - self._print_stack_entry(self._event.frame, lineno) - - # ======================================================================== - # ======================== helper functions ============================== - # ======================================================================== - - @property - def _default_file(self): - """Produce a reasonable default.""" - filename = self.get_current_frame().f_code.co_filename - if filename == '' and self._main_pyfile: - filename = self._main_pyfile - return filename - - def _format_stack_entry(self, frame, lineno, lprefix=': '): - """Return a string with information about a stack entry. - - The stack entry frame_lineno is a (frame, lineno) tuple. The - return string contains the canonical filename, the function name - or '', the input arguments, the return value, and the - line of code (if it exists). - - """ - filename = self._canonic(frame.f_code.co_filename) - s = '%s(%r)' % (filename, lineno) - if frame.f_code.co_name: - s += frame.f_code.co_name - else: - s += "" - s += '()' - if '__return__' in frame.f_locals: - rv = frame.f_locals['__return__'] - s += '->' - s += reprlib.repr(rv) - line = linecache.getline(filename, lineno, frame.f_globals) - if line: - s += lprefix + line.strip() - return s - - def _print_stack_entry(self, frame, line_number=None): - if frame is self.get_current_frame(): - prefix = '> ' - else: - prefix = ' ' - if line_number is None: - line_number = frame.f_lineno - self.message(prefix + - self._format_stack_entry(frame, line_number, '\n-> ')) - - def _canonic(self, filename): - """Return canonical form of filename. - - For real filenames, the canonical form is a case-normalized (on - case insensitive filesystems) absolute path. 'Filenames' with - angle brackets, such as "", generated in interactive - mode, are returned unchanged. - """ - if filename == "<" + filename[1:-1] + ">": - return filename - canonic = self.fncache.get(filename) - if not canonic: - canonic = os.path.abspath(filename) - canonic = os.path.normcase(canonic) - self.fncache[filename] = canonic - return canonic - - def _lookupmodule(self, filename): - """Helper function for break/clear parsing -- may be overridden. - - lookupmodule() translates (possibly incomplete) file or module name - into an absolute file name. - """ - if os.path.isabs(filename) and os.path.exists(filename): - return filename - f = os.path.join(sys.path[0], filename) - if os.path.exists(f) and self._canonic(f) == self._main_pyfile: - return f - root, ext = os.path.splitext(filename) - if ext == '': - filename = filename + '.py' - if os.path.isabs(filename): - return filename - for dirname in sys.path: - while os.path.islink(dirname): - dirname = os.readlink(dirname) - fullname = os.path.join(dirname, filename) - if os.path.exists(fullname): - return fullname - return None - - def _confirm_line_executable(self, filename, line_number): - """Check whether specified line seems to be executable. - - Return `lineno` if it is, 0 if not (e.g. a docstring, comment, blank - line or EOF). Warning: testing is not comprehensive. - """ - # this method should be callable before starting debugging, so default - # to "no globals" if there is no current frame - globs = self.get_current_frame().f_globals if self.get_current_frame() else None - line = linecache.getline(filename, line_number, globs) - if not line: - raise ValueError(f"Can't find line {line_number} in file {filename}") - line = line.strip() - # Don't allow setting breakpoint at a blank line - if (not line or (line[0] == '#') or - (line[:3] == '"""') or line[:3] == "'''"): - raise ValueError(f"Can't set breakpoint at line {line_number} in file {filename}, it's blank or a comment") - - def _parse_breakpoint_arg(self, arg): - """Parse a breakpoint argument. - - Return a tuple (filename, lineno, function, condition) - from [filename:]lineno | function, condition - """ - filename = None - line_number = None - function = None - condition = None - # parse stuff after comma: condition - comma = arg.find(',') - if comma >= 0: - condition = arg[comma+1:].lstrip() - arg = arg[:comma] - - # parse stuff before comma: [filename:]lineno | function - colon = arg.rfind(':') - if colon >= 0: - filename = arg[:colon].rstrip() - filename = self._lookupmodule(filename) - if filename is None: - raise ValueError(f"Invalid filename: {filename}") - line_number = arg[colon+1:] - try: - line_number = int(line_number) - except ValueError: - raise ValueError(f"Invalid line number: {line_number}") - else: - # no colon; can be lineno or function - try: - # Maybe it's a function? - frame = self.get_current_frame() - function = eval(arg, - frame.f_globals, - frame.f_locals) - if hasattr(function, "__func__"): - # It's a method - function = function.__func__ - code = function.__code__ - filename = code.co_filename - line_number = code.co_firstlineno - except: - # Then it has to be a line number - function = None - try: - line_number = int(arg) - except ValueError: - raise ValueError(f"Invalid breakpoint argument: {arg}") - filename = self._default_file - # Before returning, check that line on the file is executable - self._confirm_line_executable(filename, line_number) - - return filename, line_number, function, condition - - # ====================================================================== - # The following methods are called by the cmd.Cmd base class - # All the commands are in alphabetic order - # ====================================================================== - - def do_break(self, arg): - if not arg: - for bp in self.get_breakpoints(): - print(bp) - return False - try: - filename, line_number, function, condition = self._parse_breakpoint_arg(arg) - except ValueError as exc: - self.error(str(exc)) - return False - - if function: - self.set_function_breakpoint(function) - else: - self.set_file_breakpoint(filename, line_number) - return False - - do_b = do_break - - def do_clear(self, arg): - self.clear_breakpoints() - return False - - def do_continue(self, arg): - self.set_action("continue") - return True - - do_c = do_continue - - def do_down(self, arg): - """d(own) [count] - - Move the current frame count (default one) levels down in the - stack trace (to a newer frame). - """ - try: - count = int(arg or 1) - except ValueError: - self.error("Invalid count") - return False - - self.select_frame(count, offset=True) - self._print_stack_entry(self.get_current_frame()) - return False - - do_d = do_down - - def do_EOF(self, arg): - raise BdbxQuit("quit") - - def do_next(self, arg): - self.set_action("next") - return True - - do_n = do_next - - def do_quit(self, arg): - raise BdbxQuit("quit") - - do_q = do_quit - - def do_return(self, arg): - self.set_action("return") - return True - - do_r = do_return - - def do_step(self, arg): - self.set_action("step") - return True - - do_s = do_step - - def do_up(self, arg): - """u(p) [count] - - Move the current frame count (default one) levels up in the - stack trace (to an older frame). - """ - - try: - count = int(arg or 1) - except ValueError: - self.error("Invalid count") - return False - - self.select_frame(-count, offset=True) - self._print_stack_entry(self.get_current_frame()) - return False - - do_u = do_up - - def do_where(self, arg): - try: - for frame in self.get_stack(): - self._print_stack_entry(frame) - except KeyboardInterrupt: - pass - return False - - do_w = do_where - - -def break_here(): - pdb = Pdbx() - pdb.break_here(sys._getframe().f_back) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('-m', nargs='?', default=None) - options, commands = parser.parse_known_args() - - if options.m: - # Run module - pass - elif commands: - # Run scripts - target = _ScriptTarget(commands[0]) - else: - # Show help message - parser.print_help() - - pdbx = Pdbx() - pdbx._run_target(target) - - -if __name__ == '__main__': - import bdbx - bdbx.main() diff --git a/Lib/pdbx.py b/Lib/pdbx.py new file mode 100644 index 00000000000000..d935f76387fbe6 --- /dev/null +++ b/Lib/pdbx.py @@ -0,0 +1,419 @@ +import abc +import argparse +import cmd +import io +import linecache +import os +import reprlib +import runpy +import sys + +from bdbx import Bdbx, BdbxQuit +from types import CodeType + + +class _ExecuteTarget(abc.ABC): + code: CodeType + + @abc.abstractmethod + def __init__(self, filename: str): + pass + + @property + @abc.abstractmethod + def namespaces(self) -> tuple[dict, dict]: + pass + + +class _ScriptTarget(_ExecuteTarget): + def __init__(self, filename): + self.filename = filename + if not os.path.exists(filename): + raise FileNotFoundError(filename) + with io.open_code(filename) as f: + self.code = compile(f.read(), filename, "exec") + + @property + def namespaces(self): + ns = { + '__name__': '__main__', + '__file__': self.filename, + '__builtins__': __builtins__, + } + return ns, ns + + +class _ModuleTarget(_ExecuteTarget): + def __init__(self, filename): + # Just raise the normal exception if module is not found + _, self.spec, self.code = runpy._get_module_details(filename) + self.filename = os.path.normcase(os.path.abspath(self.code.co_filename)) + + @property + def namespaces(self): + ns = { + '__name__': '__main__', + '__file__': self.filename, + '__package__': self.spec.parent, + '__loader__': self.spec.loader, + '__spec__': self.spec, + '__builtins___': __builtins__, + } + return ns, ns + +class Pdbx(Bdbx, cmd.Cmd): + def __init__(self): + self._event = None + self.prompt = "(Pdbx) " + self.fncache = {} + Bdbx.__init__(self) + cmd.Cmd.__init__(self, 'tab', None, None) + + # ======================================================================== + # ============================ User APIs ================================= + # ======================================================================== + + + # ======================================================================== + # ======================= Interface to Bdbx ============================== + # ======================================================================== + + def dispatch_event(self, event): + self._event = event + self.print_header() + self.cmdloop() + + # ======================================================================== + # ================= Methods that can be overwritten ====================== + # ======================================================================== + + def error(self, msg): + print(msg) + + def message(self, msg): + print(msg) + + def print_header(self): + if self._event.is_call: + print("----call----") + elif self._event.is_return: + print("----return----") + if self._event.line_number: + lineno = self._event.line_number + else: + lineno = self._event.frame.f_lineno + self._print_stack_entry(self._event.frame, lineno) + + # ======================================================================== + # ======================== helper functions ============================== + # ======================================================================== + + def _run_target(self, target:_ExecuteTarget): + """Debug the given code object in __main__""" + import __main__ + main_dict = __main__.__dict__.copy() + globals, locals = target.namespaces + __main__.__dict__.clear() + __main__.__dict__.update(globals) + self.break_code(target.code) + try: + exec(target.code, globals, locals) + except BdbxQuit: + pass + __main__.__dict__.clear() + __main__.__dict__.update(main_dict) + + @property + def _default_file(self): + """Produce a reasonable default.""" + filename = self.get_current_frame().f_code.co_filename + if filename == '' and self._main_pyfile: + filename = self._main_pyfile + return filename + + def _format_stack_entry(self, frame, lineno, lprefix=': '): + """Return a string with information about a stack entry. + + The stack entry frame_lineno is a (frame, lineno) tuple. The + return string contains the canonical filename, the function name + or '', the input arguments, the return value, and the + line of code (if it exists). + + """ + filename = self._canonic(frame.f_code.co_filename) + s = '%s(%r)' % (filename, lineno) + if frame.f_code.co_name: + s += frame.f_code.co_name + else: + s += "" + s += '()' + if '__return__' in frame.f_locals: + rv = frame.f_locals['__return__'] + s += '->' + s += reprlib.repr(rv) + line = linecache.getline(filename, lineno, frame.f_globals) + if line: + s += lprefix + line.strip() + return s + + def _print_stack_entry(self, frame, line_number=None): + if frame is self.get_current_frame(): + prefix = '> ' + else: + prefix = ' ' + if line_number is None: + line_number = frame.f_lineno + self.message(prefix + + self._format_stack_entry(frame, line_number, '\n-> ')) + + def _canonic(self, filename): + """Return canonical form of filename. + + For real filenames, the canonical form is a case-normalized (on + case insensitive filesystems) absolute path. 'Filenames' with + angle brackets, such as "", generated in interactive + mode, are returned unchanged. + """ + if filename == "<" + filename[1:-1] + ">": + return filename + canonic = self.fncache.get(filename) + if not canonic: + canonic = os.path.abspath(filename) + canonic = os.path.normcase(canonic) + self.fncache[filename] = canonic + return canonic + + def _lookupmodule(self, filename): + """Helper function for break/clear parsing -- may be overridden. + + lookupmodule() translates (possibly incomplete) file or module name + into an absolute file name. + """ + if os.path.isabs(filename) and os.path.exists(filename): + return filename + f = os.path.join(sys.path[0], filename) + if os.path.exists(f) and self._canonic(f) == self._main_pyfile: + return f + root, ext = os.path.splitext(filename) + if ext == '': + filename = filename + '.py' + if os.path.isabs(filename): + return filename + for dirname in sys.path: + while os.path.islink(dirname): + dirname = os.readlink(dirname) + fullname = os.path.join(dirname, filename) + if os.path.exists(fullname): + return fullname + return None + + def _confirm_line_executable(self, filename, line_number): + """Check whether specified line seems to be executable. + + Return `lineno` if it is, 0 if not (e.g. a docstring, comment, blank + line or EOF). Warning: testing is not comprehensive. + """ + # this method should be callable before starting debugging, so default + # to "no globals" if there is no current frame + globs = self.get_current_frame().f_globals if self.get_current_frame() else None + line = linecache.getline(filename, line_number, globs) + if not line: + raise ValueError(f"Can't find line {line_number} in file {filename}") + line = line.strip() + # Don't allow setting breakpoint at a blank line + if (not line or (line[0] == '#') or + (line[:3] == '"""') or line[:3] == "'''"): + raise ValueError(f"Can't set breakpoint at line {line_number} in file {filename}, it's blank or a comment") + + def _parse_breakpoint_arg(self, arg): + """Parse a breakpoint argument. + + Return a tuple (filename, lineno, function, condition) + from [filename:]lineno | function, condition + """ + filename = None + line_number = None + function = None + condition = None + # parse stuff after comma: condition + comma = arg.find(',') + if comma >= 0: + condition = arg[comma+1:].lstrip() + arg = arg[:comma] + + # parse stuff before comma: [filename:]lineno | function + colon = arg.rfind(':') + if colon >= 0: + filename = arg[:colon].rstrip() + filename = self._lookupmodule(filename) + if filename is None: + raise ValueError(f"Invalid filename: {filename}") + line_number = arg[colon+1:] + try: + line_number = int(line_number) + except ValueError: + raise ValueError(f"Invalid line number: {line_number}") + else: + # no colon; can be lineno or function + try: + # Maybe it's a function? + frame = self.get_current_frame() + function = eval(arg, + frame.f_globals, + frame.f_locals) + if hasattr(function, "__func__"): + # It's a method + function = function.__func__ + code = function.__code__ + filename = code.co_filename + line_number = code.co_firstlineno + except: + # Then it has to be a line number + function = None + try: + line_number = int(arg) + except ValueError: + raise ValueError(f"Invalid breakpoint argument: {arg}") + filename = self._default_file + # Before returning, check that line on the file is executable + self._confirm_line_executable(filename, line_number) + + return filename, line_number, function, condition + + # ====================================================================== + # The following methods are called by the cmd.Cmd base class + # All the commands are in alphabetic order + # ====================================================================== + + def do_break(self, arg): + if not arg: + for bp in self.get_breakpoints(): + print(bp) + return False + try: + filename, line_number, function, condition = self._parse_breakpoint_arg(arg) + except ValueError as exc: + self.error(str(exc)) + return False + + if function: + self.set_function_breakpoint(function) + else: + self.set_file_breakpoint(filename, line_number) + return False + + do_b = do_break + + def do_clear(self, arg): + self.clear_breakpoints() + return False + + def do_continue(self, arg): + self.set_action("continue") + return True + + do_c = do_continue + + def do_down(self, arg): + """d(own) [count] + + Move the current frame count (default one) levels down in the + stack trace (to a newer frame). + """ + try: + count = int(arg or 1) + except ValueError: + self.error("Invalid count") + return False + + self.select_frame(count, offset=True) + self._print_stack_entry(self.get_current_frame()) + return False + + do_d = do_down + + def do_EOF(self, arg): + raise BdbxQuit("quit") + + def do_next(self, arg): + self.set_action("next") + return True + + do_n = do_next + + def do_quit(self, arg): + raise BdbxQuit("quit") + + do_q = do_quit + + def do_return(self, arg): + self.set_action("return") + return True + + do_r = do_return + + def do_step(self, arg): + self.set_action("step") + return True + + do_s = do_step + + def do_up(self, arg): + """u(p) [count] + + Move the current frame count (default one) levels up in the + stack trace (to an older frame). + """ + + try: + count = int(arg or 1) + except ValueError: + self.error("Invalid count") + return False + + self.select_frame(-count, offset=True) + self._print_stack_entry(self.get_current_frame()) + return False + + do_u = do_up + + def do_where(self, arg): + try: + for frame in self.get_stack(): + self._print_stack_entry(frame) + except KeyboardInterrupt: + pass + return False + + do_w = do_where + + +def break_here(): + pdb = Pdbx() + pdb.break_here(sys._getframe().f_back) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-m', nargs='?', default=None) + options, commands = parser.parse_known_args() + + if options.m: + # Run module + target = _ModuleTarget(options.m) + sys.argv[:] = [target.filename] + commands + elif commands: + # Run script + target = _ScriptTarget(commands[0]) + sys.argv[:] = commands + else: + # Show help message + parser.print_help() + + pdbx = Pdbx() + pdbx._run_target(target) + + +if __name__ == '__main__': + import pdbx + pdbx.main() diff --git a/Lib/test/test_pdbx.py b/Lib/test/test_pdbx.py index 63bea4cb9e713c..e815e4381adbee 100644 --- a/Lib/test/test_pdbx.py +++ b/Lib/test/test_pdbx.py @@ -1,4 +1,4 @@ -from bdbx import Pdbx +from pdbx import Pdbx import doctest import sys from test.test_doctest import _FakeInput @@ -25,7 +25,7 @@ def test_pdbx_basic_commands(): ... return x >>> def test_function(): - ... import bdbx; bdbx.break_here() + ... import pdbx; pdbx.break_here() ... for i in range(5): ... n = f(i) ... pass @@ -88,7 +88,7 @@ def test_pdbx_basic_breakpoint(): ... return x >>> def test_function(): - ... import bdbx; bdbx.break_here() + ... import pdbx; pdbx.break_here() ... for i in range(5): ... n = f(i) ... pass @@ -132,7 +132,7 @@ def test_pdbx_where_command(): """Test where command >>> def g(): - ... import bdbx; bdbx.break_here() + ... import pdbx; pdbx.break_here() ... pass >>> def f(): From 3503b68683a5a98c32e65bd35108ae8201f8235e Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Sat, 13 May 2023 13:27:26 +0800 Subject: [PATCH 5/7] Improve stack entry print format --- Lib/pdbx.py | 57 ++++++++++++++--------- Lib/test/test_pdbx.py | 104 ++++++++++++++++++++++-------------------- 2 files changed, 90 insertions(+), 71 deletions(-) diff --git a/Lib/pdbx.py b/Lib/pdbx.py index d935f76387fbe6..2a1eadd6b85d5b 100644 --- a/Lib/pdbx.py +++ b/Lib/pdbx.py @@ -131,7 +131,7 @@ def _default_file(self): filename = self._main_pyfile return filename - def _format_stack_entry(self, frame, lineno, lprefix=': '): + def _format_stack_entry(self, frame, lineno, stack_prefix="> ", code_prefix="-> "): """Return a string with information about a stack entry. The stack entry frame_lineno is a (frame, lineno) tuple. The @@ -143,28 +143,35 @@ def _format_stack_entry(self, frame, lineno, lprefix=': '): filename = self._canonic(frame.f_code.co_filename) s = '%s(%r)' % (filename, lineno) if frame.f_code.co_name: - s += frame.f_code.co_name + func_name = frame.f_code.co_name else: - s += "" - s += '()' - if '__return__' in frame.f_locals: - rv = frame.f_locals['__return__'] - s += '->' - s += reprlib.repr(rv) - line = linecache.getline(filename, lineno, frame.f_globals) - if line: - s += lprefix + line.strip() - return s - - def _print_stack_entry(self, frame, line_number=None): - if frame is self.get_current_frame(): - prefix = '> ' + func_name = "" + + code = linecache.getline(filename, lineno, frame.f_globals) + if code: + code = f"\n{code_prefix}{code.strip()}" else: - prefix = ' ' + code = "" + return f"{stack_prefix}{func_name}() @ {filename}:{lineno}{code}" + + def _print_stack_entry(self, + frame, + line_number=None, + stack_prefix=None, + code_prefix=None): if line_number is None: line_number = frame.f_lineno - self.message(prefix + - self._format_stack_entry(frame, line_number, '\n-> ')) + if stack_prefix is None: + if frame is self.get_current_frame(): + stack_prefix = '> ' + else: + stack_prefix = ' ' + if code_prefix is None: + code_prefix = '-> ' + self.message(self._format_stack_entry(frame, + line_number, + stack_prefix=stack_prefix, + code_prefix=code_prefix)) def _canonic(self, filename): """Return canonical form of filename. @@ -379,8 +386,16 @@ def do_up(self, arg): def do_where(self, arg): try: - for frame in self.get_stack(): - self._print_stack_entry(frame) + stack = self.get_stack() + prefix_size = len(str(len(stack))) + for idx, frame in enumerate(stack): + if frame is self.get_current_frame(): + tag = '>' + else: + tag = '#' + self._print_stack_entry(frame, + stack_prefix=f"{tag}{idx: <{prefix_size}} ", + code_prefix=f"{'': >{prefix_size}} -> ") except KeyboardInterrupt: pass return False diff --git a/Lib/test/test_pdbx.py b/Lib/test/test_pdbx.py index e815e4381adbee..3cc5e64009daf6 100644 --- a/Lib/test/test_pdbx.py +++ b/Lib/test/test_pdbx.py @@ -38,45 +38,49 @@ def test_pdbx_basic_commands(): ... 'step', ... 'step', ... 'step', + ... 'step', ... 'return', ... 'next', ... 'n', ... 'continue', ... ]): ... test_function() - > (3)test_function() + > test_function() @ :3 -> for i in range(5): (Pdbx) step - > (4)test_function() + > test_function() @ :4 -> n = f(i) (Pdbx) step - > (2)f() + > f() @ :2 -> x = x + 1 (Pdbx) step - > (3)f() + > f() @ :3 -> return x (Pdbx) step ----return---- - > (3)f() + > f() @ :3 -> return x (Pdbx) step - > (5)test_function() + > test_function() @ :5 -> pass (Pdbx) step - > (4)test_function() + > test_function() @ :3 + -> for i in range(5): + (Pdbx) step + > test_function() @ :4 -> n = f(i) (Pdbx) step - > (2)f() + > f() @ :2 -> x = x + 1 (Pdbx) return - > (5)test_function() + > test_function() @ :5 -> pass (Pdbx) next - > (4)test_function() - -> n = f(i) + > test_function() @ :3 + -> for i in range(5): (Pdbx) n - > (5)test_function() - -> pass + > test_function() @ :4 + -> n = f(i) (Pdbx) continue """ @@ -106,24 +110,24 @@ def test_pdbx_basic_breakpoint(): ... 'continue', ... ]): ... test_function() - > (3)test_function() + > test_function() @ :3 -> for i in range(5): (Pdbx) break invalid Invalid breakpoint argument: invalid (Pdbx) break f (Pdbx) continue ----call---- - > (1)f() + > f() @ :1 -> def f(x): (Pdbx) clear (Pdbx) return - > (5)test_function() + > test_function() @ :5 -> pass (Pdbx) break 6 (Pdbx) break 100 Can't find line 100 in file (Pdbx) continue - > (6)test_function() + > test_function() @ :6 -> a = 3 (Pdbx) continue """ @@ -151,52 +155,52 @@ def test_pdbx_where_command(): ... 'continue', ... ]): ... test_function() - > (3)g() + > g() @ :3 -> pass (Pdbx) w ... - (10)() - -> test_function() - (2)test_function() - -> f() - (2)f() - -> g(); - > (3)g() - -> pass + #18 () @ :10 + -> test_function() + #19 test_function() @ :2 + -> f() + #20 f() @ :2 + -> g(); + >21 g() @ :3 + -> pass (Pdbx) where ... - (10)() - -> test_function() - (2)test_function() - -> f() - (2)f() - -> g(); - > (3)g() - -> pass + #18 () @ :10 + -> test_function() + #19 test_function() @ :2 + -> f() + #20 f() @ :2 + -> g(); + >21 g() @ :3 + -> pass (Pdbx) u - > (2)f() + > f() @ :2 -> g(); (Pdbx) w ... - (2)test_function() - -> f() - > (2)f() - -> g(); - (3)g() - -> pass + #19 test_function() @ :2 + -> f() + >20 f() @ :2 + -> g(); + #21 g() @ :3 + -> pass (Pdbx) d - > (3)g() + > g() @ :3 -> pass (Pdbx) w ... - (10)() - -> test_function() - (2)test_function() - -> f() - (2)f() - -> g(); - > (3)g() - -> pass + #18 () @ :10 + -> test_function() + #19 test_function() @ :2 + -> f() + #20 f() @ :2 + -> g(); + >21 g() @ :3 + -> pass (Pdbx) continue """ From 567cf75e186ae827129914f8d5ec35ddb3bb0022 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Sat, 13 May 2023 16:00:33 +0800 Subject: [PATCH 6/7] Enable monitor to handle multiple debuggers --- Lib/bdbx.py | 330 ++++++++++++++++++++++++++++++++++------------------ Lib/pdbx.py | 1 - 2 files changed, 215 insertions(+), 116 deletions(-) diff --git a/Lib/bdbx.py b/Lib/bdbx.py index dcda33360785b8..2c86aee9e077ee 100644 --- a/Lib/bdbx.py +++ b/Lib/bdbx.py @@ -4,7 +4,6 @@ from dataclasses import dataclass from types import FrameType, CodeType -from typing import Callable MEVENT = sys.monitoring.events @@ -45,45 +44,44 @@ def _get_next_id(cls): return cls._next_id - 1 -class MonitorGenie: - """ - MonitorGenie is a layer to handle PEP-669 events aka sys.monitoring. - - It saves the trouble for the debugger to handle the monitoring events. - MonitorGenie takes file and function breakpoints, and an action to start - the monitoring. The accepted actions are: - "step" - "next" - "return" - "continue" - """ +class _DebuggerMonitor: def __init__( self, - tool_id: int, - debugger_entry: Callable[[FrameType, Breakpoint | None, int, dict | None], None] + debugger, ): self._action = None self._frame = None - self._tool_id = tool_id + self._debugger = debugger self._returning = False - self._tasks = [] self._bound_breakpoints: dict[CodeType, list[Breakpoint]] = {} self._free_breakpoints: list[Breakpoint] = [] - self._debugger_entry = debugger_entry - self._code_with_events = set() - sys.monitoring.use_tool_id(tool_id, "MonitorGenie") - self._register_callbacks() + self._code_with_events: dict[CodeType, int] = {} - # ======================================================================== - # ============================= Public API =============================== - # ======================================================================== + @property + def has_breakpoint(self): + return self._free_breakpoints or self._bound_breakpoints - def start_monitor(self, action: str, frame: FrameType): - """starts monitoring with the given action and frame""" - self._clear_monitor() - self._try_bind_breakpoints(frame.f_code) - self._set_events_for_breakpoints() - self._action, self._frame = action, frame + def get_global_events(self): + if self._free_breakpoints: + return MEVENT.CALL + else: + return MEVENT.NO_EVENTS + + def get_local_events(self): + for code, bp_list in self._bound_breakpoints.items(): + for breakpoint in bp_list: + if breakpoint.line_number is not None: + self._add_local_events(code, MEVENT.LINE) + else: + self._add_local_events(code, MEVENT.PY_START) + return self._code_with_events + + def clear_events(self): + self._code_with_events = {} + + def set_action(self, action: str, frame: FrameType): + self._action = action + self._frame = frame if action == "step": if not self._returning: self._add_local_events(frame.f_code, MEVENT.LINE | MEVENT.CALL | MEVENT.PY_RETURN \ @@ -101,16 +99,24 @@ def start_monitor(self, action: str, frame: FrameType): self._add_local_events(frame.f_back.f_code, MEVENT.LINE | MEVENT.PY_RETURN | MEVENT.PY_YIELD) elif action == "continue": pass - self._returning = False - sys.monitoring.restart_events() - def start_monitor_code(self, code: CodeType): - """starts monitoring when the given code is executed""" - self._clear_monitor() - self._action, self._frame = "step", None + def set_code(self, code: CodeType): + self._action = "step" + self.frame = None self._add_local_events(code, MEVENT.LINE | MEVENT.CALL | MEVENT.PY_RETURN | MEVENT.PY_YIELD) - sys.monitoring.restart_events() + + def try_bind_breakpoints(self, code): + """try to bind free breakpoint, return whether any breakpoint is bound""" + bp_dirty = False + for bp in self._free_breakpoints[:]: + if bp.belong_to(code): + self.remove_breakpoint(bp) + bp.code = code + self.add_breakpoint(bp) + bp_dirty = True + break + return bp_dirty def add_breakpoint(self, breakpoint: Breakpoint): """adds a breakpoint to the list of breakpoints""" @@ -127,49 +133,50 @@ def remove_breakpoint(self, breakpoint: Breakpoint): self._free_breakpoints.remove(breakpoint) else: self._bound_breakpoints[breakpoint.code].remove(breakpoint) + if not self._bound_breakpoints[breakpoint.code]: + del self._bound_breakpoints[breakpoint.code] - # ======================================================================== - # ============================ Private API =============================== - # ======================================================================== - - def _clear_monitor(self): - sys.monitoring.set_events(self._tool_id, 0) - for code in self._code_with_events: - sys.monitoring.set_local_events(self._tool_id, code, 0) + def line_callback(self, frame, code, line_number): + if bp := self._breakhere(code, line_number): + self._debugger.monitor_callback(frame, bp, MEVENT.LINE, + {"code": code, "line_number": line_number}) + elif self._stophere(code): + self._debugger.monitor_callback(frame, None, MEVENT.LINE, + {"code": code, "line_number": line_number}) + else: + return False + return True - def _add_global_events(self, events): - curr_events = sys.monitoring.get_events(self._tool_id) - sys.monitoring.set_events(self._tool_id, curr_events | events) + def call_callback(self, code): + # The only possible trigget for this is "step" action + # If the callable is instrumentable, do it, otherwise ignore it + if self._action == "step": + self._add_local_events(code, MEVENT.LINE) + return True + return False - def _add_local_events(self, code, events): - curr_events = sys.monitoring.get_local_events(self._tool_id, code) - self._code_with_events.add(code) - sys.monitoring.set_local_events(self._tool_id, code, curr_events | events) + def start_callback(self, frame, code, instruction_offset): + if bp := self._breakhere(code, None): + self._debugger.monitor_callback(frame, bp, MEVENT.PY_START, + {"code": code, "instruction_offset": instruction_offset}) + elif self._stophere(code): + self._debugger.monitor_callback(frame, None, MEVENT.PY_START, + {"code": code, "instruction_offset": instruction_offset}) + else: + return False + return True - def _set_events_for_breakpoints(self): - if self._free_breakpoints: - self._add_global_events(MEVENT.PY_START) - for code, bp_list in self._bound_breakpoints.items(): - for breakpoint in bp_list: - if breakpoint.line_number is not None: - self._add_local_events(code, MEVENT.LINE) - else: - self._add_local_events(code, MEVENT.PY_START) + def return_callback(self, frame, code, instruction_offset, retval): + if self._stophere(code): + self._returning = True + self._debugger.monitor_callback(frame, None, MEVENT.PY_RETURN, + {"code": code, "instruction_offset": instruction_offset, "retval": retval}) + else: + return False + return True - def _try_bind_breakpoints(self, code): - # copy the breakpoints so we can remove bp from it - bp_dirty = False - for bp in self._free_breakpoints[:]: - if bp.belong_to(code): - self.remove_breakpoint(bp) - bp.code = code - self.add_breakpoint(bp) - bp_dirty = True - break - if bp_dirty: - self._set_events_for_breakpoints() - if not self._free_breakpoints: - sys.monitoring.set_events(self._tool_id, 0) + def _add_local_events(self, code, events): + self._code_with_events[code] = self._code_with_events.get(code, 0) | events def _stophere(self, code): if self._action == "step": @@ -191,6 +198,111 @@ def _breakhere(self, code, line_number): return bp return None + +class MonitorGenie: + """ + MonitorGenie is a layer to handle PEP-669 events aka sys.monitoring. + + It saves the trouble for the debugger to handle the monitoring events. + MonitorGenie takes file and function breakpoints, and an action to start + the monitoring. The accepted actions are: + "step" + "next" + "return" + "continue" + """ + def __init__( + self, + tool_id: int, + ): + self._action = None + self._frame = None + self._tool_id = tool_id + self._tasks = [] + sys.monitoring.use_tool_id(tool_id, "MonitorGenie") + self._register_callbacks() + self._debugger_monitors: dict[Bdbx, _DebuggerMonitor] = {} + self._code_with_events = set() + + # ======================================================================== + # ============================= Public API =============================== + # ======================================================================== + + def register_debugger(self, debugger): + if debugger not in self._debugger_monitors: + self._debugger_monitors[debugger] = _DebuggerMonitor(debugger) + + def unregister_debugger(self, debugger): + if debugger in self._debugger_monitors: + self._debugger_monitors.pop(debugger) + + def start_monitor(self, debugger, action: str, frame: FrameType): + """starts monitoring with the given action and frame""" + dbg_monitor = self._get_monitor(debugger) + + if action == "continue" and not dbg_monitor.has_breakpoint: + self.unregister_debugger(debugger) + else: + dbg_monitor.clear_events() + dbg_monitor.try_bind_breakpoints(frame.f_code) + dbg_monitor.set_action(action, frame) + + self._set_events() + sys.monitoring.restart_events() + + def start_monitor_code(self, debugger, code: CodeType): + """starts monitoring when the given code is executed""" + dbg_monitor = self._get_monitor(debugger) + dbg_monitor.clear_events() + dbg_monitor.set_code(code) + self._set_events() + sys.monitoring.restart_events() + + def add_breakpoint(self, debugger, breakpoint: Breakpoint): + self._get_monitor(debugger).add_breakpoint(breakpoint) + + def remove_breakpoint(self, debugger, breakpoint: Breakpoint): + self._get_monitor(debugger).remove_breakpoint(breakpoint) + + # ======================================================================== + # ============================ Private API =============================== + # ======================================================================== + + def _get_monitor(self, debugger): + if debugger not in self._debugger_monitors: + self.register_debugger(debugger) + return self._debugger_monitors[debugger] + + def _get_monitors(self): + return list(self._debugger_monitors.values()) + + def _set_events(self): + """ + Go through all the registered debuggers and figure out all the events + that need to be set + """ + self._clear_monitor() + global_events = MEVENT.NO_EVENTS + for dbg_monitor in self._get_monitors(): + global_events |= dbg_monitor.get_global_events() + for code, events in dbg_monitor.get_local_events().items(): + self._add_local_events(code, events) + + def _clear_monitor(self): + sys.monitoring.set_events(self._tool_id, 0) + for code in self._code_with_events: + sys.monitoring.set_local_events(self._tool_id, code, 0) + self._code_with_events = set() + + def _add_global_events(self, events): + curr_events = sys.monitoring.get_events(self._tool_id) + sys.monitoring.set_events(self._tool_id, curr_events | events) + + def _add_local_events(self, code, events): + curr_events = sys.monitoring.get_local_events(self._tool_id, code) + self._code_with_events.add(code) + sys.monitoring.set_local_events(self._tool_id, code, curr_events | events) + # Callbacks for the real sys.monitoring def _register_callbacks(self): @@ -201,13 +313,11 @@ def _register_callbacks(self): sys.monitoring.register_callback(self._tool_id, MEVENT.PY_RETURN, self._return_callback) def _line_callback(self, code, line_number): - if bp := self._breakhere(code, line_number): - self._start_debugger(sys._getframe().f_back, bp, MEVENT.LINE, - {"code": code, "line_number": line_number}) - elif self._stophere(code): - self._start_debugger(sys._getframe().f_back, None, MEVENT.LINE, - {"code": code, "line_number": line_number}) - else: + frame = sys._getframe(1) + triggered_callback = False + for dbg_monitor in self._get_monitors(): + triggered_callback |= dbg_monitor.line_callback(frame, code, line_number) + if not triggered_callback: return sys.monitoring.DISABLE def _call_callback(self, code, instruction_offset, callable, arg0): @@ -222,30 +332,30 @@ def _call_callback(self, code, instruction_offset, callable, arg0): except AttributeError: pass if code is not None: - self._add_local_events(code, MEVENT.LINE) + for dbg_monitor in self._get_monitors(): + if dbg_monitor.call_callback(code): + self._set_events() def _start_callback(self, code, instruction_offset): - self._try_bind_breakpoints(code) - if bp := self._breakhere(code, None): - self._start_debugger(sys._getframe().f_back, bp, MEVENT.PY_START, - {"code": code, "instruction_offset": instruction_offset}) - elif self._stophere(code): - self._start_debugger(sys._getframe().f_back, None, MEVENT.PY_START, - {"code": code, "instruction_offset": instruction_offset}) - else: + frame = sys._getframe(1) + triggered_callback = False + for dbg_monitor in self._get_monitors(): + if dbg_monitor.try_bind_breakpoints(code): + self._set_events() + triggered_callback |= dbg_monitor.start_callback(frame, code, instruction_offset) + if not triggered_callback: return sys.monitoring.DISABLE def _return_callback(self, code, instruction_offset, retval): - if self._stophere(code): - self._returning = True - self._start_debugger(sys._getframe().f_back, None, MEVENT.PY_RETURN, - {"code": code, "instruction_offset": instruction_offset, "retval": retval}) - else: + frame = sys._getframe(1) + triggered_callback = False + for dbg_monitor in self._get_monitors(): + triggered_callback |= dbg_monitor.return_callback(frame, code, instruction_offset, retval) + if not triggered_callback: return sys.monitoring.DISABLE - def _start_debugger(self, frame, breakpoint, event, args): - self._debugger_entry(frame, breakpoint, event, args) +_monitor_genie = MonitorGenie(sys.monitoring.DEBUGGER_ID) @dataclass class StopEvent: @@ -255,17 +365,6 @@ class StopEvent: is_return: bool = False class Bdbx: - """Bdbx is a singleton class that implements the debugger logic""" - _instance = None - - def __new__(cls): - if Bdbx._instance is None: - instance = super().__new__(cls) - instance._tool_id = sys.monitoring.DEBUGGER_ID - instance._monitor_genie = MonitorGenie(instance._tool_id, instance.monitor_callback) - Bdbx._instance = instance - return Bdbx._instance - def __init__(self): self._next_action = None self._next_action_frame = None @@ -274,6 +373,7 @@ def __init__(self): self._curr_frame = None self._main_pyfile = '' self.clear_breakpoints() + self._monitor_genie = _monitor_genie # ======================================================================== # ============================= Public API =============================== @@ -284,12 +384,12 @@ def break_here(self, frame=None): if frame is None: frame = sys._getframe().f_back self.set_action("next", frame) - self._monitor_genie.start_monitor(self._next_action, self._next_action_frame) + self._monitor_genie.start_monitor(self, self._next_action, self._next_action_frame) def break_code(self, code): """break into the debugger when the given code is executed""" self.set_action("step", None) - self._monitor_genie.start_monitor_code(code) + self._monitor_genie.start_monitor_code(self, code) def set_action(self, action, frame=None): """Set the next action, if frame is None, use the current frame""" @@ -308,7 +408,7 @@ def set_function_breakpoint(self, func): # Setting line_number to None for function breakpoints bp = Breakpoint(abspath, None, func.__code__) self._breakpoints.append(bp) - self._monitor_genie.add_breakpoint(bp) + self._monitor_genie.add_breakpoint(self, bp) def set_file_breakpoint(self, filename, line_number): """Set a breakpoint at the given line number in the given file @@ -318,12 +418,12 @@ def set_file_breakpoint(self, filename, line_number): """ bp = Breakpoint(filename, line_number, None) self._breakpoints.append(bp) - self._monitor_genie.add_breakpoint(bp) + self._monitor_genie.add_breakpoint(self, bp) def clear_breakpoints(self): if hasattr(self, "_breakpoints"): for bp in self._breakpoints: - self._monitor_genie.remove_breakpoint(bp) + self._monitor_genie.remove_breakpoint(self, bp) self._breakpoints = [] def select_frame(self, index, offset=False): @@ -383,7 +483,7 @@ def monitor_callback(self, frame, breakpoint, event, event_arg): self.dispatch_event(self._stop_event) # After the dispatch returns, reset the monitor - self._monitor_genie.start_monitor(self._next_action, self._next_action_frame) + self._monitor_genie.start_monitor(self, self._next_action, self._next_action_frame) # ======================================================================== # ======================= Helper functions =============================== diff --git a/Lib/pdbx.py b/Lib/pdbx.py index 2a1eadd6b85d5b..6fa8f1cee8ef96 100644 --- a/Lib/pdbx.py +++ b/Lib/pdbx.py @@ -4,7 +4,6 @@ import io import linecache import os -import reprlib import runpy import sys From c97f7284a308f609d7f5164722f89e648014c8ba Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Sat, 13 May 2023 16:53:24 +0800 Subject: [PATCH 7/7] Add p/pp and command line tests --- Lib/pdbx.py | 67 ++++++++++++++-- Lib/test/test_pdbx.py | 173 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 233 insertions(+), 7 deletions(-) diff --git a/Lib/pdbx.py b/Lib/pdbx.py index 6fa8f1cee8ef96..d5c02a152cb183 100644 --- a/Lib/pdbx.py +++ b/Lib/pdbx.py @@ -4,8 +4,10 @@ import io import linecache import os +import pprint import runpy import sys +import traceback from bdbx import Bdbx, BdbxQuit from types import CodeType @@ -115,10 +117,7 @@ def _run_target(self, target:_ExecuteTarget): __main__.__dict__.clear() __main__.__dict__.update(globals) self.break_code(target.code) - try: - exec(target.code, globals, locals) - except BdbxQuit: - pass + exec(target.code, globals, locals) __main__.__dict__.clear() __main__.__dict__.update(main_dict) @@ -130,6 +129,7 @@ def _default_file(self): filename = self._main_pyfile return filename + # ======================= Formatting Helpers ============================ def _format_stack_entry(self, frame, lineno, stack_prefix="> ", code_prefix="-> "): """Return a string with information about a stack entry. @@ -153,6 +153,9 @@ def _format_stack_entry(self, frame, lineno, stack_prefix="> ", code_prefix="-> code = "" return f"{stack_prefix}{func_name}() @ {filename}:{lineno}{code}" + def _format_exception(exc): + return traceback.format_exception_only(exc)[-1].strip() + def _print_stack_entry(self, frame, line_number=None, @@ -286,6 +289,14 @@ def _parse_breakpoint_arg(self, arg): return filename, line_number, function, condition + def _getval(self, arg): + try: + frame = self.get_current_frame() + return eval(arg, frame.f_globals, frame.f_locals) + except: + self.error(self._format_exception(sys.exception())) + raise + # ====================================================================== # The following methods are called by the cmd.Cmd base class # All the commands are in alphabetic order @@ -339,6 +350,7 @@ def do_down(self, arg): do_d = do_down def do_EOF(self, arg): + self.message('') raise BdbxQuit("quit") def do_next(self, arg): @@ -347,6 +359,32 @@ def do_next(self, arg): do_n = do_next + def do_p(self, arg): + """p expression + + Print the value of the expression. + """ + try: + val = self._getval(arg) + self.message(repr(val)) + except: + # error message is printed + pass + return False + + def do_pp(self, arg): + """pp expression + + Pretty-print the value of the expression. + """ + try: + val = self._getval(arg) + self.message(pprint.pformat(val)) + except: + # error message is printed + pass + return False + def do_quit(self, arg): raise BdbxQuit("quit") @@ -406,6 +444,14 @@ def break_here(): pdb = Pdbx() pdb.break_here(sys._getframe().f_back) +_usage = """\ +usage: pdbx.py [-m module | pyfile] [arg] ... + +Debug the Python program given by pyfile. Alternatively, +an executable module or package to debug can be specified using +the -m switch. +""" + def main(): parser = argparse.ArgumentParser() @@ -422,10 +468,19 @@ def main(): sys.argv[:] = commands else: # Show help message - parser.print_help() + print(_usage) + sys.exit(2) pdbx = Pdbx() - pdbx._run_target(target) + while True: + try: + pdbx._run_target(target) + except SystemExit as e: + # In most cases SystemExit does not warrant a post-mortem session. + print("The program exited via sys.exit(). Exit status:", end=' ') + print(e) + except BdbxQuit: + break if __name__ == '__main__': diff --git a/Lib/test/test_pdbx.py b/Lib/test/test_pdbx.py index 3cc5e64009daf6..80668fbb002434 100644 --- a/Lib/test/test_pdbx.py +++ b/Lib/test/test_pdbx.py @@ -1,6 +1,13 @@ -from pdbx import Pdbx import doctest +import os +import pdbx +import subprocess import sys +import textwrap +import unittest + +from test import support +from test.support import os_helper from test.test_doctest import _FakeInput class PdbxTestInput(object): @@ -208,3 +215,167 @@ def load_tests(loader, tests, pattern): from test import test_pdbx tests.addTest(doctest.DocTestSuite(test_pdbx)) return tests + + +@support.requires_subprocess() +class PdbxTestCase(unittest.TestCase): + def tearDown(self): + os_helper.unlink(os_helper.TESTFN) + + @unittest.skipIf(sys.flags.safe_path, + 'PYTHONSAFEPATH changes default sys.path') + def _run_pdbx(self, pdbx_args, commands, expected_returncode=0): + self.addCleanup(os_helper.rmtree, '__pycache__') + cmd = [sys.executable, '-m', 'pdbx'] + pdbx_args + with subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stdin=subprocess.PIPE, + stderr=subprocess.STDOUT, + env = {**os.environ, 'PYTHONIOENCODING': 'utf-8'} + ) as proc: + stdout, stderr = proc.communicate(str.encode(commands)) + stdout = stdout and bytes.decode(stdout) + stderr = stderr and bytes.decode(stderr) + self.assertEqual( + proc.returncode, + expected_returncode, + f"Unexpected return code\nstdout: {stdout}\nstderr: {stderr}" + ) + return stdout, stderr + + def run_pdbx_script(self, script, commands, expected_returncode=0): + """Run 'script' lines with pdbx and the pdbx 'commands'.""" + filename = 'main.py' + with open(filename, 'w') as f: + f.write(textwrap.dedent(script)) + self.addCleanup(os_helper.unlink, filename) + return self._run_pdbx([filename], commands, expected_returncode) + + def run_pdbx_module(self, script, commands): + """Runs the script code as part of a module""" + self.module_name = 't_main' + os_helper.rmtree(self.module_name) + main_file = self.module_name + '/__main__.py' + init_file = self.module_name + '/__init__.py' + os.mkdir(self.module_name) + with open(init_file, 'w') as f: + pass + with open(main_file, 'w') as f: + f.write(textwrap.dedent(script)) + self.addCleanup(os_helper.rmtree, self.module_name) + return self._run_pdbx(['-m', self.module_name], commands) + + def test_run_module(self): + script = """print("SUCCESS")""" + commands = """ + continue + quit + """ + stdout, stderr = self.run_pdbx_module(script, commands) + self.assertTrue(any("SUCCESS" in l for l in stdout.splitlines()), stdout) + + def test_module_is_run_as_main(self): + script = """ + if __name__ == '__main__': + print("SUCCESS") + """ + commands = """ + continue + quit + """ + stdout, stderr = self.run_pdbx_module(script, commands) + self.assertTrue(any("SUCCESS" in l for l in stdout.splitlines()), stdout) + + def test_run_pdbx_with_pdbx(self): + commands = """ + c + quit + """ + stdout, stderr = self._run_pdbx(["-m", "pdbx"], commands) + self.assertIn( + pdbx._usage, + stdout.replace('\r', '') # remove \r for windows + ) + + def test_module_without_a_main(self): + module_name = 't_main' + os_helper.rmtree(module_name) + init_file = module_name + '/__init__.py' + os.mkdir(module_name) + with open(init_file, 'w'): + pass + self.addCleanup(os_helper.rmtree, module_name) + stdout, stderr = self._run_pdbx( + ['-m', module_name], "", expected_returncode=1 + ) + self.assertIn("ImportError: No module named t_main.__main__", + stdout.splitlines()) + + def test_package_without_a_main(self): + pkg_name = 't_pkg' + module_name = 't_main' + os_helper.rmtree(pkg_name) + modpath = pkg_name + '/' + module_name + os.makedirs(modpath) + with open(modpath + '/__init__.py', 'w'): + pass + self.addCleanup(os_helper.rmtree, pkg_name) + stdout, stderr = self._run_pdbx( + ['-m', modpath.replace('/', '.')], "", expected_returncode=1 + ) + self.assertIn( + "'t_pkg.t_main' is a package and cannot be directly executed", + stdout) + + def test_blocks_at_first_code_line(self): + script = """ + #This is a comment, on line 2 + + print("SUCCESS") + """ + commands = """ + quit + """ + stdout, stderr = self.run_pdbx_module(script, commands) + self.assertTrue(any("__main__.py:4" + in l for l in stdout.splitlines()), stdout) + self.assertTrue(any("" + in l for l in stdout.splitlines()), stdout) + + def test_relative_imports(self): + self.module_name = 't_main' + os_helper.rmtree(self.module_name) + main_file = self.module_name + '/__main__.py' + init_file = self.module_name + '/__init__.py' + module_file = self.module_name + '/module.py' + self.addCleanup(os_helper.rmtree, self.module_name) + os.mkdir(self.module_name) + with open(init_file, 'w') as f: + f.write(textwrap.dedent(""" + top_var = "VAR from top" + """)) + with open(main_file, 'w') as f: + f.write(textwrap.dedent(""" + from . import top_var + from .module import var + from . import module + pass # We'll stop here and print the vars + """)) + with open(module_file, 'w') as f: + f.write(textwrap.dedent(""" + var = "VAR from module" + var2 = "second var" + """)) + commands = """ + b 5 + c + p top_var + p var + p module.var2 + quit + """ + stdout, _ = self._run_pdbx(['-m', self.module_name], commands) + self.assertTrue(any("VAR from module" in l for l in stdout.splitlines()), stdout) + self.assertTrue(any("VAR from top" in l for l in stdout.splitlines())) + self.assertTrue(any("second var" in l for l in stdout.splitlines()))