Skip to content

Index

code_context_agent.tools

Custom tools package for code context analysis.

This package provides all tools used by the code context agent: - Discovery: File manifests, repomix bundles, ripgrep search - LSP: Language server operations for semantic analysis - ast-grep: Structural code search with rule packs - Shell: Bounded command execution

CommandResult

Bases: TypedDict

Result of a shell command execution.

ToolResult

Bases: FrozenModel

Standardized result structure for tool responses.

Provides a consistent JSON serialization pattern for tool outputs.

Example

result = ToolResult(status="success", data={"count": 42}) return result.to_json() '{"status": "success", "data": {"count": 42}}'

result = ToolResult.error("File not found") return result.to_json() '{"status": "error", "error": "File not found"}'

to_json

to_json()

Serialize to JSON string, omitting None values.

Source code in src/code_context_agent/tools/shell.py
def to_json(self) -> str:
    """Serialize to JSON string, omitting None values."""
    d: dict[str, Any] = {"status": self.status}
    if self.data:
        d.update(self.data)
    if self.error_message:
        d["error"] = self.error_message
    return json.dumps(d)

success classmethod

success(**data)

Create a success result with data.

Source code in src/code_context_agent/tools/shell.py
@classmethod
def success(cls, **data: Any) -> ToolResult:
    """Create a success result with data."""
    return cls(status="success", data=data if data else None)

error classmethod

error(message, **extra)

Create an error result.

Source code in src/code_context_agent/tools/shell.py
@classmethod
def error(cls, message: str, **extra: Any) -> ToolResult:
    """Create an error result."""
    return cls(status="error", error_message=message, data=extra if extra else None)

ValidationError

Bases: ValueError

Raised when input validation fails.

astgrep_inline_rule

astgrep_inline_rule(
    language,
    rule_yaml,
    repo_path,
    include_globs=None,
    max_results=100,
)

Run ast-grep with an inline YAML rule definition.

Use this for custom one-off patterns that aren't in the predefined rule packs.

Parameters:

Name Type Description Default
language str

Language identifier.

required
rule_yaml str

Inline YAML rule definition.

required
repo_path str

Repository root path.

required
include_globs list[str] | None

Paths to include.

None
max_results int

Maximum results.

100

Returns:

Type Description
str

JSON array of matches.

Example

rule = ''' ... id: find-fetch ... language: TypeScript ... rule: ... pattern: fetch($$ARGS) ... ''' result = astgrep_inline_rule("ts", rule, "/path/to/repo")

Source code in src/code_context_agent/tools/astgrep.py
@tool
def astgrep_inline_rule(  # noqa: C901
    language: str,
    rule_yaml: str,
    repo_path: str,
    include_globs: list[str] | None = None,
    max_results: int = 100,
) -> str:
    """Run ast-grep with an inline YAML rule definition.

    Use this for custom one-off patterns that aren't in the predefined
    rule packs.

    Args:
        language: Language identifier.
        rule_yaml: Inline YAML rule definition.
        repo_path: Repository root path.
        include_globs: Paths to include.
        max_results: Maximum results.

    Returns:
        JSON array of matches.

    Example:
        >>> rule = '''
        ... id: find-fetch
        ... language: TypeScript
        ... rule:
        ...   pattern: fetch($$ARGS)
        ... '''
        >>> result = astgrep_inline_rule("ts", rule, "/path/to/repo")
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})

    # Build command as list (no shell)
    cmd_list = [
        "ast-grep",
        "scan",
        "--inline-rules",
        rule_yaml,
        "--json=stream",
    ]

    if include_globs:
        for glob in include_globs:
            try:
                validate_glob_pattern(glob)
            except ValidationError as e:
                return json.dumps({"status": "error", "error": str(e)})
            cmd_list.extend(["--globs", glob])

    cmd_list.append(str(repo))

    # Run directly without shell
    try:
        proc_result = subprocess.run(
            cmd_list,
            cwd=str(repo),
            capture_output=True,
            text=True,
            timeout=120,
        )
        # Limit output lines after capture to avoid broken pipe
        stdout_lines = proc_result.stdout.split("\n")[: max_results * 10]
        result = {
            "status": "success" if proc_result.returncode == 0 else "error",
            "stdout": "\n".join(stdout_lines),
            "stderr": proc_result.stderr[:10000] if proc_result.stderr else "",
            "return_code": proc_result.returncode,
        }
    except subprocess.TimeoutExpired:
        result = {
            "status": "error",
            "stdout": "",
            "stderr": "Command timed out",
            "return_code": -1,
        }
    except (subprocess.SubprocessError, OSError) as e:
        result = {
            "status": "error",
            "stdout": "",
            "stderr": str(e),
            "return_code": -1,
        }

    # Parse results
    matches = []
    if result["stdout"]:
        for line in result["stdout"].strip().split("\n"):
            if not line:
                continue
            try:
                data = json.loads(line)
                matches.append(
                    {
                        "file": data.get("file", ""),
                        "range": data.get("range", {}),
                        "text": data.get("text", ""),
                    },
                )
                if len(matches) >= max_results:
                    break
            except json.JSONDecodeError:
                continue

    return json.dumps(
        {
            "status": "success" if matches or result["return_code"] == 0 else "no_matches",
            "language": language,
            "matches": matches,
            "match_count": len(matches),
        },
    )

astgrep_scan

astgrep_scan(
    language,
    pattern,
    repo_path,
    include_globs=None,
    exclude_globs=None,
    max_results=100,
)

Run ast-grep structural search with a pattern.

Performs AST-based structural code search, which is more precise than regex for finding code patterns like function calls, assignments, etc.

Parameters:

Name Type Description Default
language str

Language identifier ("ts", "tsx", "py", "js", "jsx").

required
pattern str

ast-grep pattern (e.g., "\(OBJ.\)METHOD($$ARGS)").

required
repo_path str

Repository root path.

required
include_globs list[str] | None

Paths to include (e.g., ["src/", "apps/"]).

None
exclude_globs list[str] | None

Paths to exclude (e.g., ["/node_modules/"]).

None
max_results int

Maximum results to return.

100

Returns:

Type Description
str

JSON array of matches with file, range, and matched text.

Example

result = astgrep_scan("ts", "\(DB.query(\)\(ARGS)", "/path/to/repo") result = astgrep_scan("py", "\)OBJ.execute($$SQL)", "/path/to/repo", include_globs=["src/**"])

Source code in src/code_context_agent/tools/astgrep.py
@tool
def astgrep_scan(  # noqa: C901
    language: str,
    pattern: str,
    repo_path: str,
    include_globs: list[str] | None = None,
    exclude_globs: list[str] | None = None,
    max_results: int = 100,
) -> str:
    """Run ast-grep structural search with a pattern.

    Performs AST-based structural code search, which is more precise than
    regex for finding code patterns like function calls, assignments, etc.

    Args:
        language: Language identifier ("ts", "tsx", "py", "js", "jsx").
        pattern: ast-grep pattern (e.g., "$OBJ.$METHOD($$ARGS)").
        repo_path: Repository root path.
        include_globs: Paths to include (e.g., ["src/**", "apps/**"]).
        exclude_globs: Paths to exclude (e.g., ["**/node_modules/**"]).
        max_results: Maximum results to return.

    Returns:
        JSON array of matches with file, range, and matched text.

    Example:
        >>> result = astgrep_scan("ts", "$DB.query($$ARGS)", "/path/to/repo")
        >>> result = astgrep_scan("py", "$OBJ.execute($$SQL)", "/path/to/repo", include_globs=["src/**"])
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})

    # Build command as list (no shell)
    cmd_list = [
        "ast-grep",
        "run",
        "-l",
        language,
        "-p",
        pattern,
        "--json=stream",
    ]

    # Add globs
    if include_globs:
        for glob in include_globs:
            try:
                validate_glob_pattern(glob)
            except ValidationError as e:
                return json.dumps({"status": "error", "error": str(e)})
            cmd_list.extend(["--globs", glob])

    if exclude_globs:
        for glob in exclude_globs:
            try:
                validate_glob_pattern(glob)
            except ValidationError as e:
                return json.dumps({"status": "error", "error": str(e)})
            cmd_list.extend(["--globs", f"!{glob}"])

    cmd_list.append(str(repo))

    # Run directly without shell
    try:
        proc_result = subprocess.run(
            cmd_list,
            cwd=str(repo),
            capture_output=True,
            text=True,
            timeout=120,
        )
        # Limit output lines after capture to avoid broken pipe
        stdout_lines = proc_result.stdout.split("\n")[: max_results * 10]
        result = {
            "status": "success" if proc_result.returncode == 0 else "error",
            "stdout": "\n".join(stdout_lines),
            "stderr": proc_result.stderr[:10000] if proc_result.stderr else "",
            "return_code": proc_result.returncode,
        }
    except subprocess.TimeoutExpired:
        result = {
            "status": "error",
            "stdout": "",
            "stderr": "Command timed out",
            "return_code": -1,
        }
    except (subprocess.SubprocessError, OSError) as e:
        result = {
            "status": "error",
            "stdout": "",
            "stderr": str(e),
            "return_code": -1,
        }

    # Parse streaming JSON output
    matches = []
    if result["stdout"]:
        for line in result["stdout"].strip().split("\n"):
            if not line:
                continue
            try:
                data = json.loads(line)
                matches.append(
                    {
                        "file": data.get("file", ""),
                        "range": data.get("range", {}),
                        "text": data.get("text", ""),
                        "rule_id": data.get("ruleId", ""),
                    },
                )
                if len(matches) >= max_results:
                    break
            except json.JSONDecodeError:
                continue

    return json.dumps(
        {
            "status": "success" if matches or result["return_code"] == 0 else "no_matches",
            "language": language,
            "pattern": pattern,
            "matches": matches,
            "match_count": len(matches),
        },
    )

astgrep_scan_rule_pack

astgrep_scan_rule_pack(
    rule_pack,
    repo_path,
    include_globs=None,
    exclude_globs=None,
    max_results=200,
)

Run ast-grep with a predefined rule pack for business logic detection.

Rule packs are YAML files with multiple rules for detecting specific patterns like DB calls, state mutations, and API interactions.

Available rule packs: - "ts_business_logic": TypeScript/JavaScript DB, state, API patterns - "py_business_logic": Python DB, state, HTTP patterns

Parameters:

Name Type Description Default
rule_pack str

Name of the rule pack to use.

required
repo_path str

Repository root path.

required
include_globs list[str] | None

Paths to include.

None
exclude_globs list[str] | None

Paths to exclude.

None
max_results int

Maximum results to return.

200

Returns:

Type Description
str

JSON array of matches grouped by rule ID.

Example

result = astgrep_scan_rule_pack("ts_business_logic", "/path/to/repo")

Source code in src/code_context_agent/tools/astgrep.py
@tool
def astgrep_scan_rule_pack(  # noqa: C901
    rule_pack: str,
    repo_path: str,
    include_globs: list[str] | None = None,
    exclude_globs: list[str] | None = None,
    max_results: int = 200,
) -> str:
    """Run ast-grep with a predefined rule pack for business logic detection.

    Rule packs are YAML files with multiple rules for detecting specific
    patterns like DB calls, state mutations, and API interactions.

    Available rule packs:
    - "ts_business_logic": TypeScript/JavaScript DB, state, API patterns
    - "py_business_logic": Python DB, state, HTTP patterns

    Args:
        rule_pack: Name of the rule pack to use.
        repo_path: Repository root path.
        include_globs: Paths to include.
        exclude_globs: Paths to exclude.
        max_results: Maximum results to return.

    Returns:
        JSON array of matches grouped by rule ID.

    Example:
        >>> result = astgrep_scan_rule_pack("ts_business_logic", "/path/to/repo")
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})

    # Find rule file
    rule_file = RULES_DIR / f"{rule_pack}.yml"
    if not rule_file.exists():
        available = [f.stem for f in RULES_DIR.glob("*.yml")]
        return json.dumps(
            {
                "status": "error",
                "error": f"Rule pack not found: {rule_pack}. Available: {available}",
            },
        )

    # Build command as list (no shell)
    cmd_list = [
        "ast-grep",
        "scan",
        "--config",
        str(rule_file),
        "--json=stream",
    ]

    # Add globs
    if include_globs:
        for glob in include_globs:
            try:
                validate_glob_pattern(glob)
            except ValidationError as e:
                return json.dumps({"status": "error", "error": str(e)})
            cmd_list.extend(["--globs", glob])

    if exclude_globs:
        for glob in exclude_globs:
            try:
                validate_glob_pattern(glob)
            except ValidationError as e:
                return json.dumps({"status": "error", "error": str(e)})
            cmd_list.extend(["--globs", f"!{glob}"])

    cmd_list.append(str(repo))

    # Run directly without shell
    try:
        proc_result = subprocess.run(
            cmd_list,
            cwd=str(repo),
            capture_output=True,
            text=True,
            timeout=120,
        )
        # Limit output lines after capture to avoid broken pipe
        stdout_lines = proc_result.stdout.split("\n")[: max_results * 10]
        result = {
            "status": "success" if proc_result.returncode == 0 else "error",
            "stdout": "\n".join(stdout_lines),
            "stderr": proc_result.stderr[:10000] if proc_result.stderr else "",
            "return_code": proc_result.returncode,
        }
    except subprocess.TimeoutExpired:
        result = {
            "status": "error",
            "stdout": "",
            "stderr": "Command timed out",
            "return_code": -1,
        }
    except (subprocess.SubprocessError, OSError) as e:
        result = {
            "status": "error",
            "stdout": "",
            "stderr": str(e),
            "return_code": -1,
        }

    # Parse streaming JSON output and group by rule
    matches_by_rule: dict[str, list[dict]] = {}
    total_count = 0

    if result["stdout"]:
        for line in result["stdout"].strip().split("\n"):
            if not line:
                continue
            try:
                data = json.loads(line)
                rule_id = data.get("ruleId", "unknown")
                match = {
                    "file": data.get("file", ""),
                    "range": data.get("range", {}),
                    "text": data.get("text", ""),
                    "message": data.get("message", ""),
                }
                if rule_id not in matches_by_rule:
                    matches_by_rule[rule_id] = []
                matches_by_rule[rule_id].append(match)
                total_count += 1
                if total_count >= max_results:
                    break
            except json.JSONDecodeError:
                continue

    return json.dumps(
        {
            "status": "success" if matches_by_rule or result["return_code"] == 0 else "no_matches",
            "rule_pack": rule_pack,
            "matches_by_rule": matches_by_rule,
            "total_count": total_count,
            "rule_count": len(matches_by_rule),
        },
    )

create_file_manifest

create_file_manifest(repo_path)

Create ignore-aware file manifest using ripgrep.

USE THIS TOOL: As the FIRST step in any codebase analysis workflow. Creates a safe inventory of files without reading contents.

DO NOT USE: - If you already have a manifest from a previous call in this session - If .code-context/files.all.txt exists and is recent

Generates a list of all files in the repository, respecting .gitignore and skipping hidden/binary files. Output is written to .code-context/files.all.txt.

Parameters:

Name Type Description Default
repo_path str

Absolute path to the repository root.

required

Returns:

Type Description
str

JSON with:

str
  • manifest_path: Path to .code-context/files.all.txt
str
  • file_count: Number of files found (typical: 100-5000)

Output Size: ~100 bytes JSON + manifest file (~50 bytes per file path)

Common Errors
  • "rg not found": ripgrep not installed (install with: cargo install ripgrep)
  • Empty manifest: Check if repo_path is correct and contains files
  • Permission denied: Ensure read access to the repository
Example success

{"status": "success", "manifest_path": "/repo/.code-context/files.all.txt", "file_count": 847}

Source code in src/code_context_agent/tools/discovery.py
@tool
def create_file_manifest(repo_path: str) -> str:
    """Create ignore-aware file manifest using ripgrep.

    USE THIS TOOL: As the FIRST step in any codebase analysis workflow.
    Creates a safe inventory of files without reading contents.

    DO NOT USE:
    - If you already have a manifest from a previous call in this session
    - If .code-context/files.all.txt exists and is recent

    Generates a list of all files in the repository, respecting .gitignore
    and skipping hidden/binary files. Output is written to .code-context/files.all.txt.

    Args:
        repo_path: Absolute path to the repository root.

    Returns:
        JSON with:
        - manifest_path: Path to .code-context/files.all.txt
        - file_count: Number of files found (typical: 100-5000)

    Output Size: ~100 bytes JSON + manifest file (~50 bytes per file path)

    Common Errors:
        - "rg not found": ripgrep not installed (install with: cargo install ripgrep)
        - Empty manifest: Check if repo_path is correct and contains files
        - Permission denied: Ensure read access to the repository

    Example success:
        {"status": "success", "manifest_path": "/repo/.code-context/files.all.txt", "file_count": 847}
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})
    agent_dir = repo / DEFAULT_OUTPUT_DIR
    agent_dir.mkdir(exist_ok=True)
    manifest_path = agent_dir / "files.all.txt"

    # Use ripgrep --files which respects .gitignore
    # Use sh -c for shell redirection with shlex.quote for path safety
    result = run_command(
        ["sh", "-c", f"rg --files > {shlex.quote(str(manifest_path))}"],
        cwd=str(repo),
    )

    if result["status"] != "success":
        return json.dumps(
            {
                "status": "error",
                "error": result["stderr"],
            },
        )

    # Count files
    try:
        file_count = sum(1 for _ in manifest_path.open())
    except OSError:
        file_count = 0

    logger.info(f"Created file manifest with {file_count} files")

    return json.dumps(
        {
            "status": "success",
            "manifest_path": str(manifest_path),
            "file_count": file_count,
        },
    )

read_file_bounded

read_file_bounded(file_path, max_lines=500, start_line=1)

Read a file with bounded output for safe analysis.

USE THIS TOOL: - To deeply read and understand business logic files identified by graph analysis, LSP, or AST-grep. Essential for Phase 6.5 (Deep Read). - To read a SINGLE specific file when you know the exact path - To inspect implementation details after finding via rg_search - To read configuration files (package.json, pyproject.toml, etc.) - When you need line numbers for subsequent LSP calls - For files >500 lines, paginate using start_line (e.g., read 1-500, then 501-1000)

DO NOT USE: - To read multiple files at once (use repomix_bundle instead) - For initial exploration before Phase 3 (use repomix_orientation first) - For files >500 lines without specifying start_line for pagination

Reads file contents with line limits to prevent token overflow. Includes line numbers formatted as " 123| code here".

Parameters:

Name Type Description Default
file_path str

Absolute path to the file.

required
max_lines int

Maximum lines to read (default 500, reduce for large files).

500
start_line int

Starting line number (1-indexed, use for pagination).

1

Returns:

Type Description
str

JSON with content (with line numbers), path, lines_read, and truncated flag.

Output Size: ~80 bytes per line average. 500 lines = ~40KB.

Common Errors
  • "File not found": Check path is absolute and file exists
  • "truncated": true: File has more lines, use start_line to paginate
  • UnicodeDecodeError: File is binary, not suitable for text reading
Example success

{"status": "success", "path": "/repo/src/main.py", "content": " 1| ...", "start_line": 1, "lines_read": 150, "truncated": false}

Example pagination (reading lines 500-1000): >>> read_file_bounded("/repo/large_file.py", max_lines=500, start_line=500)

Source code in src/code_context_agent/tools/discovery.py
@tool
def read_file_bounded(file_path: str, max_lines: int = 500, start_line: int = 1) -> str:
    """Read a file with bounded output for safe analysis.

    USE THIS TOOL:
    - To deeply read and understand business logic files identified by graph analysis,
      LSP, or AST-grep. Essential for Phase 6.5 (Deep Read).
    - To read a SINGLE specific file when you know the exact path
    - To inspect implementation details after finding via rg_search
    - To read configuration files (package.json, pyproject.toml, etc.)
    - When you need line numbers for subsequent LSP calls
    - For files >500 lines, paginate using start_line (e.g., read 1-500, then 501-1000)

    DO NOT USE:
    - To read multiple files at once (use repomix_bundle instead)
    - For initial exploration before Phase 3 (use repomix_orientation first)
    - For files >500 lines without specifying start_line for pagination

    Reads file contents with line limits to prevent token overflow.
    Includes line numbers formatted as "  123| code here".

    Args:
        file_path: Absolute path to the file.
        max_lines: Maximum lines to read (default 500, reduce for large files).
        start_line: Starting line number (1-indexed, use for pagination).

    Returns:
        JSON with content (with line numbers), path, lines_read, and truncated flag.

    Output Size: ~80 bytes per line average. 500 lines = ~40KB.

    Common Errors:
        - "File not found": Check path is absolute and file exists
        - "truncated": true: File has more lines, use start_line to paginate
        - UnicodeDecodeError: File is binary, not suitable for text reading

    Example success:
        {"status": "success", "path": "/repo/src/main.py", "content": "     1| ...",
         "start_line": 1, "lines_read": 150, "truncated": false}

    Example pagination (reading lines 500-1000):
        >>> read_file_bounded("/repo/large_file.py", max_lines=500, start_line=500)
    """
    try:
        path = validate_file_path(file_path)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})

    if not path.exists():
        return json.dumps(
            {
                "status": "error",
                "error": f"File not found: {path}",
            },
        )

    try:
        with path.open(encoding="utf-8", errors="replace") as f:
            lines = []
            for i, line in enumerate(f, 1):
                if i < start_line:
                    continue
                if i >= start_line + max_lines:
                    break
                lines.append(f"{i:6d}| {line.rstrip()}")

        content = "\n".join(lines)
        total_lines_read = len(lines)

        return json.dumps(
            {
                "status": "success",
                "path": str(path),
                "content": content,
                "start_line": start_line,
                "lines_read": total_lines_read,
                "truncated": total_lines_read >= max_lines,
            },
        )
    except (OSError, ValueError) as e:
        return json.dumps(
            {
                "status": "error",
                "error": str(e),
            },
        )

repomix_bundle

repomix_bundle(
    file_list_path,
    output_path,
    compress=True,
    include_diffs=False,
    include_logs=False,
    include_logs_count=50,
    split_size=None,
    truncate_base64=True,
    remove_comments=False,
)

Pack curated files into markdown context bundle.

USE THIS TOOL: When you have a curated list of file paths and want to bundle their contents into a single markdown file for analysis.

DO NOT USE: - For initial exploration (use repomix_orientation first) - If you don't have a file list yet (use write_file_list first)

Takes a list of file paths and bundles their contents into a single markdown file using repomix. The --stdin flag reads paths from the provided file list.

Parameters:

Name Type Description Default
file_list_path str

Path to file containing paths to pack (one per line).

required
output_path str

Output markdown file path.

required
compress bool

Use tree-sitter compression to reduce size.

True
include_diffs bool

Include git working tree + staged changes in the bundle.

False
include_logs bool

Include recent git commit history in the bundle.

False
include_logs_count int

Number of recent commits to include (only when include_logs=True).

50
split_size str | None

Split output into chunks of this size (e.g., "500kb", "2mb"). Useful for very large bundles that exceed context windows.

None
truncate_base64 bool

Truncate base64-encoded data to reduce token waste (default True).

True
remove_comments bool

Strip comments from source code for minimal structural output.

False

Returns:

Type Description
str

JSON with output path, file size, and status.

Output Size: Varies by file count and content. Compressed bundles are ~30-50% smaller.

Common Errors
  • "File list not found": Ensure file_list_path exists and has content
  • Timeout after 300s: Too many/large files, reduce scope or use split_size
  • "repomix not found": Install with npm install -g repomix
Example

result = repomix_bundle(".code-context/files.targeted.txt", ".code-context/CONTEXT.bundle.md") result = repomix_bundle( ... ".code-context/files.targeted.txt", ... ".code-context/CONTEXT.bundle.md", ... include_diffs=True, ... include_logs=True, ... include_logs_count=20, ... )

Source code in src/code_context_agent/tools/discovery.py
@tool
def repomix_bundle(  # noqa: C901
    file_list_path: str,
    output_path: str,
    compress: bool = True,
    include_diffs: bool = False,
    include_logs: bool = False,
    include_logs_count: int = 50,
    split_size: str | None = None,
    truncate_base64: bool = True,
    remove_comments: bool = False,
) -> str:
    """Pack curated files into markdown context bundle.

    USE THIS TOOL: When you have a curated list of file paths and want to
    bundle their contents into a single markdown file for analysis.

    DO NOT USE:
    - For initial exploration (use repomix_orientation first)
    - If you don't have a file list yet (use write_file_list first)

    Takes a list of file paths and bundles their contents into a single
    markdown file using repomix. The --stdin flag reads paths from the
    provided file list.

    Args:
        file_list_path: Path to file containing paths to pack (one per line).
        output_path: Output markdown file path.
        compress: Use tree-sitter compression to reduce size.
        include_diffs: Include git working tree + staged changes in the bundle.
        include_logs: Include recent git commit history in the bundle.
        include_logs_count: Number of recent commits to include (only when include_logs=True).
        split_size: Split output into chunks of this size (e.g., "500kb", "2mb").
            Useful for very large bundles that exceed context windows.
        truncate_base64: Truncate base64-encoded data to reduce token waste (default True).
        remove_comments: Strip comments from source code for minimal structural output.

    Returns:
        JSON with output path, file size, and status.

    Output Size: Varies by file count and content. Compressed bundles are ~30-50% smaller.

    Common Errors:
        - "File list not found": Ensure file_list_path exists and has content
        - Timeout after 300s: Too many/large files, reduce scope or use split_size
        - "repomix not found": Install with npm install -g repomix

    Example:
        >>> result = repomix_bundle(".code-context/files.targeted.txt", ".code-context/CONTEXT.bundle.md")
        >>> result = repomix_bundle(
        ...     ".code-context/files.targeted.txt",
        ...     ".code-context/CONTEXT.bundle.md",
        ...     include_diffs=True,
        ...     include_logs=True,
        ...     include_logs_count=20,
        ... )
    """
    try:
        file_list = validate_file_path(file_list_path)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})
    try:
        output = validate_file_path(output_path, must_exist=False)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})

    if not file_list.exists():
        return json.dumps(
            {
                "status": "error",
                "error": f"File list not found: {file_list}",
            },
        )

    # Build repomix argument list
    repomix_parts = ["--stdin", "--style", "markdown", "--output-show-line-numbers"]

    if compress:
        repomix_parts.append("--compress")
    if include_diffs:
        repomix_parts.append("--include-diffs")
    if include_logs:
        repomix_parts.append("--include-logs")
        repomix_parts.extend(["--include-logs-count", str(include_logs_count)])
    if split_size is not None:
        repomix_parts.extend(["--split-output", split_size])
    if truncate_base64:
        repomix_parts.append("--truncate-base64")
    if remove_comments:
        repomix_parts.append("--remove-comments")

    # Read file list content and pipe via stdin (no shell)
    file_list_content = file_list.read_text()
    cmd = ["repomix", *repomix_parts, "-o", str(output)]

    result = run_command(cmd, timeout=300, input_data=file_list_content)

    if result["status"] != "success":
        return json.dumps(
            {
                "status": "error",
                "error": result["stderr"],
                "stdout": result["stdout"],
            },
        )

    # Get file size
    try:
        file_size = output.stat().st_size
    except OSError:
        file_size = 0

    logger.info(f"Created context bundle: {output} ({file_size} bytes)")

    return json.dumps(
        {
            "status": "success",
            "output_path": str(output),
            "file_size_bytes": file_size,
        },
    )

repomix_bundle_with_context

repomix_bundle_with_context(
    repo_path,
    output_path,
    include_patterns=None,
    compress=True,
    include_diffs=True,
    include_logs=True,
    include_logs_count=50,
    truncate_base64=True,
)

Bundle repository files with git context (diffs and logs).

USE THIS TOOL: When you need a comprehensive snapshot of a repository that includes both file contents and recent git activity. Combines file bundling with git diffs and commit history in a single call.

DO NOT USE: - For initial exploration (use repomix_orientation first) - If you only need file contents without git context (use repomix_bundle) - For very large repos without include_patterns (will be slow/huge)

Unlike repomix_bundle which reads from a file list via --stdin, this tool operates directly on a repo path with optional glob include patterns. It always includes git context (diffs and/or logs) to provide a change-aware view of the codebase.

Parameters:

Name Type Description Default
repo_path str

Absolute path to the repository root.

required
output_path str

Output markdown file path.

required
include_patterns str | None

Comma-separated glob patterns to include (e.g., "src//*.py,tests//*.py"). If None, includes all files (respecting .gitignore).

None
compress bool

Use tree-sitter compression to reduce size.

True
include_diffs bool

Include git working tree + staged changes (default True).

True
include_logs bool

Include recent git commit history (default True).

True
include_logs_count int

Number of recent commits to include (only when include_logs=True).

50
truncate_base64 bool

Truncate base64-encoded data to reduce token waste (default True).

True

Returns:

Type Description
str

JSON with output path, file size, and status.

Output Size
  • Small repos with few changes: ~50-200KB
  • Medium repos with active changes: ~200KB-1MB
  • Execution time: 10-120 seconds depending on repo size and history
Common Errors
  • "repomix not found": Install with npm install -g repomix
  • Timeout after 300s: Use include_patterns to narrow scope
  • Large output: Reduce include_logs_count or use include_patterns
Example

result = repomix_bundle_with_context( ... "/repo", ... ".code-context/CONTEXT.git-aware.md", ... include_patterns="src/**/*.py", ... include_logs_count=20, ... )

Source code in src/code_context_agent/tools/discovery.py
@tool
def repomix_bundle_with_context(
    repo_path: str,
    output_path: str,
    include_patterns: str | None = None,
    compress: bool = True,
    include_diffs: bool = True,
    include_logs: bool = True,
    include_logs_count: int = 50,
    truncate_base64: bool = True,
) -> str:
    """Bundle repository files with git context (diffs and logs).

    USE THIS TOOL: When you need a comprehensive snapshot of a repository
    that includes both file contents and recent git activity. Combines
    file bundling with git diffs and commit history in a single call.

    DO NOT USE:
    - For initial exploration (use repomix_orientation first)
    - If you only need file contents without git context (use repomix_bundle)
    - For very large repos without include_patterns (will be slow/huge)

    Unlike repomix_bundle which reads from a file list via --stdin, this tool
    operates directly on a repo path with optional glob include patterns.
    It always includes git context (diffs and/or logs) to provide a
    change-aware view of the codebase.

    Args:
        repo_path: Absolute path to the repository root.
        output_path: Output markdown file path.
        include_patterns: Comma-separated glob patterns to include (e.g., "src/**/*.py,tests/**/*.py").
            If None, includes all files (respecting .gitignore).
        compress: Use tree-sitter compression to reduce size.
        include_diffs: Include git working tree + staged changes (default True).
        include_logs: Include recent git commit history (default True).
        include_logs_count: Number of recent commits to include (only when include_logs=True).
        truncate_base64: Truncate base64-encoded data to reduce token waste (default True).

    Returns:
        JSON with output path, file size, and status.

    Output Size:
        - Small repos with few changes: ~50-200KB
        - Medium repos with active changes: ~200KB-1MB
        - Execution time: 10-120 seconds depending on repo size and history

    Common Errors:
        - "repomix not found": Install with npm install -g repomix
        - Timeout after 300s: Use include_patterns to narrow scope
        - Large output: Reduce include_logs_count or use include_patterns

    Example:
        >>> result = repomix_bundle_with_context(
        ...     "/repo",
        ...     ".code-context/CONTEXT.git-aware.md",
        ...     include_patterns="src/**/*.py",
        ...     include_logs_count=20,
        ... )
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})
    try:
        output = validate_file_path(output_path, must_exist=False)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})

    # Build repomix command
    cmd_parts = [
        "repomix",
        "--style",
        "markdown",
        "--output-show-line-numbers",
    ]

    if compress:
        cmd_parts.append("--compress")
    if include_diffs:
        cmd_parts.append("--include-diffs")
    if include_logs:
        cmd_parts.append("--include-logs")
        cmd_parts.extend(["--include-logs-count", str(include_logs_count)])
    if truncate_base64:
        cmd_parts.append("--truncate-base64")
    if include_patterns:
        cmd_parts.extend(["--include", include_patterns])

    cmd_parts.extend(["-o", str(output)])
    cmd_parts.append(str(repo))

    result = run_command(cmd_parts, cwd=str(repo), timeout=300)

    if result["status"] != "success":
        return json.dumps(
            {
                "status": "error",
                "error": result["stderr"],
                "stdout": result["stdout"],
            },
        )

    # Get file size
    try:
        file_size = output.stat().st_size
    except OSError:
        file_size = 0

    logger.info(f"Created git-aware context bundle: {output} ({file_size} bytes)")

    return json.dumps(
        {
            "status": "success",
            "output_path": str(output),
            "file_size_bytes": file_size,
        },
    )

repomix_compressed_signatures

repomix_compressed_signatures(
    repo_path, include_patterns=None, output_path=None
)

Extract code signatures and types from a repository using Tree-sitter compression.

Produces a minimal structural view: function/method signatures, class declarations, interface/type definitions, imports — with implementation bodies stripped. Also removes comments and empty lines for maximum token efficiency.

Supported languages: JavaScript, TypeScript, Python, Go, Rust, Java, C#, Ruby, PHP, Swift, C, C++, CSS, Solidity, Vue, Dart.

USE THIS TOOL: - For a quick structural overview of specific directories or file patterns - When you need to understand the API surface without reading implementations - To identify function signatures and types across a large codebase efficiently

DO NOT USE: - If you need full implementation details (use repomix_bundle) - For initial codebase overview (use repomix_orientation first) - For non-code files (compression only works on supported languages)

Parameters:

Name Type Description Default
repo_path str

Absolute path to the repository root.

required
include_patterns str | None

Comma-separated glob patterns to include (e.g., "src//*.py,lib//*.ts").

None
output_path str | None

Output path. Defaults to .code-context/CONTEXT.signatures.md

None

Returns:

Type Description
str

JSON with output path, file size, and status.

Output Size
  • Typically 60-80% smaller than full bundles due to body stripping + comment removal
  • Small repos: ~5-30KB
  • Medium repos: ~30-150KB
  • Execution time: 5-60 seconds
Common Errors
  • "repomix not found": Install with npm install -g repomix
  • Timeout after 180s: Use include_patterns to narrow scope
  • Empty output: No supported language files matched
Example

result = repomix_compressed_signatures("/repo", include_patterns="src/**/*.py") result = repomix_compressed_signatures("/repo") # All files

Source code in src/code_context_agent/tools/discovery.py
@tool
def repomix_compressed_signatures(
    repo_path: str,
    include_patterns: str | None = None,
    output_path: str | None = None,
) -> str:
    """Extract code signatures and types from a repository using Tree-sitter compression.

    Produces a minimal structural view: function/method signatures, class declarations,
    interface/type definitions, imports — with implementation bodies stripped.
    Also removes comments and empty lines for maximum token efficiency.

    Supported languages: JavaScript, TypeScript, Python, Go, Rust, Java, C#, Ruby,
    PHP, Swift, C, C++, CSS, Solidity, Vue, Dart.

    USE THIS TOOL:
    - For a quick structural overview of specific directories or file patterns
    - When you need to understand the API surface without reading implementations
    - To identify function signatures and types across a large codebase efficiently

    DO NOT USE:
    - If you need full implementation details (use repomix_bundle)
    - For initial codebase overview (use repomix_orientation first)
    - For non-code files (compression only works on supported languages)

    Args:
        repo_path: Absolute path to the repository root.
        include_patterns: Comma-separated glob patterns to include (e.g., "src/**/*.py,lib/**/*.ts").
        output_path: Output path. Defaults to .code-context/CONTEXT.signatures.md

    Returns:
        JSON with output path, file size, and status.

    Output Size:
        - Typically 60-80% smaller than full bundles due to body stripping + comment removal
        - Small repos: ~5-30KB
        - Medium repos: ~30-150KB
        - Execution time: 5-60 seconds

    Common Errors:
        - "repomix not found": Install with npm install -g repomix
        - Timeout after 180s: Use include_patterns to narrow scope
        - Empty output: No supported language files matched

    Example:
        >>> result = repomix_compressed_signatures("/repo", include_patterns="src/**/*.py")
        >>> result = repomix_compressed_signatures("/repo")  # All files
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})
    agent_dir = repo / DEFAULT_OUTPUT_DIR
    agent_dir.mkdir(exist_ok=True)

    if output_path is None:
        output = agent_dir / "CONTEXT.signatures.md"
    else:
        output = Path(output_path).resolve()

    # Build repomix command for compressed signatures
    cmd_parts = [
        "repomix",
        "--compress",
        "--remove-comments",
        "--remove-empty-lines",
        "--style",
        "markdown",
        "--output-show-line-numbers",
        "-o",
        str(output),
    ]

    if include_patterns:
        cmd_parts.extend(["--include", include_patterns])

    cmd_parts.append(str(repo))

    result = run_command(cmd_parts, cwd=str(repo), timeout=180)

    if result["status"] != "success":
        return json.dumps(
            {
                "status": "error",
                "error": result["stderr"],
                "stdout": result["stdout"],
            },
        )

    # Get file size
    try:
        file_size = output.stat().st_size
    except OSError:
        file_size = 0

    logger.info(f"Created compressed signatures: {output} ({file_size} bytes)")

    return json.dumps(
        {
            "status": "success",
            "output_path": str(output),
            "file_size_bytes": file_size,
        },
    )

repomix_json_export

repomix_json_export(repo_path, include_patterns=None)

Export repository structure as JSON for programmatic analysis.

USE THIS TOOL: When you need structured data about the repository rather than a human-readable markdown bundle. Useful for getting exact file counts, token distributions, and directory structure as machine-parseable data.

DO NOT USE: - For reading file contents (use repomix_bundle or read_file_bounded) - For initial high-level overview (use repomix_orientation) - If you only need file paths (use create_file_manifest)

Uses repomix --style json to produce structured output that can be parsed programmatically. The output includes file metadata without file contents (--no-files), keeping the output compact.

Parameters:

Name Type Description Default
repo_path str

Absolute path to the repository root.

required
include_patterns str | None

Comma-separated glob patterns to include (e.g., "src//*.py,tests//*.py").

None

Returns:

Type Description
str

JSON with output_path and parsed metadata (total_files, total_tokens).

Output Size: ~200 bytes JSON response + JSON file on disk (~1-50KB depending on repo).

Common Errors
  • "repomix not found": Install with npm install -g repomix
  • Timeout after 180s: Use include_patterns to narrow scope
  • JSON parse error: repomix output format may have changed
Example success

{"status": "success", "output_path": "/repo/.code-context/structure.json", "total_files": 247, "total_tokens": 185420}

Example

result = repomix_json_export("/repo", include_patterns="src//*.py,tests//*.py")

Source code in src/code_context_agent/tools/discovery.py
@tool
def repomix_json_export(repo_path: str, include_patterns: str | None = None) -> str:
    """Export repository structure as JSON for programmatic analysis.

    USE THIS TOOL: When you need structured data about the repository
    rather than a human-readable markdown bundle. Useful for getting
    exact file counts, token distributions, and directory structure as
    machine-parseable data.

    DO NOT USE:
    - For reading file contents (use repomix_bundle or read_file_bounded)
    - For initial high-level overview (use repomix_orientation)
    - If you only need file paths (use create_file_manifest)

    Uses repomix --style json to produce structured output that can be
    parsed programmatically. The output includes file metadata without
    file contents (--no-files), keeping the output compact.

    Args:
        repo_path: Absolute path to the repository root.
        include_patterns: Comma-separated glob patterns to include (e.g., "src/**/*.py,tests/**/*.py").

    Returns:
        JSON with output_path and parsed metadata (total_files, total_tokens).

    Output Size: ~200 bytes JSON response + JSON file on disk (~1-50KB depending on repo).

    Common Errors:
        - "repomix not found": Install with npm install -g repomix
        - Timeout after 180s: Use include_patterns to narrow scope
        - JSON parse error: repomix output format may have changed

    Example success:
        {"status": "success", "output_path": "/repo/.code-context/structure.json",
         "total_files": 247, "total_tokens": 185420}

    Example:
        >>> result = repomix_json_export("/repo", include_patterns="src/**/*.py,tests/**/*.py")
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})
    agent_dir = repo / DEFAULT_OUTPUT_DIR
    agent_dir.mkdir(exist_ok=True)
    output_path = agent_dir / "structure.json"

    # Build repomix command for JSON export
    cmd_parts = [
        "repomix",
        "--style",
        "json",
        "--no-files",
        "-o",
        str(output_path),
    ]

    if include_patterns:
        cmd_parts.extend(["--include", include_patterns])

    cmd_parts.append(str(repo))

    result = run_command(cmd_parts, cwd=str(repo), timeout=180)

    if result["status"] != "success":
        return json.dumps(
            {
                "status": "error",
                "error": result["stderr"],
                "stdout": result["stdout"],
            },
        )

    # Parse the JSON output to extract metadata
    total_files = 0
    total_tokens = 0

    try:
        with output_path.open(encoding="utf-8") as f:
            data = json.load(f)

        # Extract metadata from repomix JSON structure
        if isinstance(data, dict):
            total_files = data.get("totalFiles", data.get("total_files", 0))
            total_tokens = data.get("totalTokens", data.get("total_tokens", 0))
    except (OSError, json.JSONDecodeError, KeyError) as e:
        logger.warning(f"Could not parse repomix JSON metadata: {e}")

    logger.info(f"Exported JSON structure: {output_path} ({total_files} files, {total_tokens} tokens)")

    return json.dumps(
        {
            "status": "success",
            "output_path": str(output_path),
            "total_files": total_files,
            "total_tokens": total_tokens,
        },
    )

repomix_orientation

repomix_orientation(
    repo_path, token_threshold=300, max_file_count=10000
)

Generate token-aware orientation snapshot without file contents.

USE THIS TOOL: After create_file_manifest to understand codebase structure and identify high-complexity areas via token distribution.

DO NOT USE: - If repo has >10K files (will auto-skip with recommendation) - If you only need to find specific files (use rg_search instead) - If .code-context/CONTEXT.orientation.md exists and repo hasn't changed

Uses repomix to create a metadata overview including directory structure and token distribution tree. Helps identify where code complexity lies without bundling actual content.

Parameters:

Name Type Description Default
repo_path str

Absolute path to the repository root.

required
token_threshold int

Minimum tokens to show in tree (filters noise).

300
max_file_count int

Maximum files allowed before skipping (default 10000).

10000

Returns:

Type Description
str

JSON with output path and status, or skipped status for large repos.

Output Size
  • Small repos (<500 files): ~5-20KB markdown
  • Medium repos (500-2000 files): ~20-100KB markdown
  • Large repos (2000-10000 files): ~100-500KB markdown
  • Execution time: 5-60 seconds depending on repo size
Common Errors
  • "repomix not found": Install with npm install -g repomix
  • "skipped" status: Repo exceeds max_file_count, use --include patterns
  • Timeout after 180s: Repo too large, reduce scope with glob patterns
Example success

{"status": "success", "output_path": "/repo/.code-context/CONTEXT.orientation.md"}

Example skipped

{"status": "skipped", "reason": "Repository has 15000 files (max: 10000)"}

Source code in src/code_context_agent/tools/discovery.py
@tool
def repomix_orientation(
    repo_path: str,
    token_threshold: int = 300,
    max_file_count: int = 10000,
) -> str:
    """Generate token-aware orientation snapshot without file contents.

    USE THIS TOOL: After create_file_manifest to understand codebase structure
    and identify high-complexity areas via token distribution.

    DO NOT USE:
    - If repo has >10K files (will auto-skip with recommendation)
    - If you only need to find specific files (use rg_search instead)
    - If .code-context/CONTEXT.orientation.md exists and repo hasn't changed

    Uses repomix to create a metadata overview including directory structure
    and token distribution tree. Helps identify where code complexity lies
    without bundling actual content.

    Args:
        repo_path: Absolute path to the repository root.
        token_threshold: Minimum tokens to show in tree (filters noise).
        max_file_count: Maximum files allowed before skipping (default 10000).

    Returns:
        JSON with output path and status, or skipped status for large repos.

    Output Size:
        - Small repos (<500 files): ~5-20KB markdown
        - Medium repos (500-2000 files): ~20-100KB markdown
        - Large repos (2000-10000 files): ~100-500KB markdown
        - Execution time: 5-60 seconds depending on repo size

    Common Errors:
        - "repomix not found": Install with npm install -g repomix
        - "skipped" status: Repo exceeds max_file_count, use --include patterns
        - Timeout after 180s: Repo too large, reduce scope with glob patterns

    Example success:
        {"status": "success", "output_path": "/repo/.code-context/CONTEXT.orientation.md"}

    Example skipped:
        {"status": "skipped", "reason": "Repository has 15000 files (max: 10000)"}
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})
    agent_dir = repo / DEFAULT_OUTPUT_DIR
    agent_dir.mkdir(exist_ok=True)
    output_path = agent_dir / "CONTEXT.orientation.md"

    # Pre-check file count to avoid long-running operations on large repos
    # Use cwd instead of embedding path in command to avoid shell escaping issues
    count_result = run_command(
        ["sh", "-c", "rg --files | wc -l"],
        cwd=str(repo),
        timeout=10,
    )

    if count_result["status"] == "success":
        try:
            file_count = int(count_result["stdout"].strip())
            if file_count > max_file_count:
                logger.warning(f"Repository has {file_count} files, exceeding max of {max_file_count}")
                return json.dumps(
                    {
                        "status": "skipped",
                        "reason": f"Repository has {file_count} files (max: {max_file_count})",
                        "recommendation": "Use --include patterns to limit scope",
                    },
                )
        except ValueError:
            pass  # Proceed if we can't parse count

    cmd = [
        "repomix",
        "--no-files",
        "--style",
        "markdown",
        "--token-count-tree",
        str(token_threshold),
        "-o",
        str(output_path),
        str(repo),
    ]

    result = run_command(cmd, cwd=str(repo), timeout=180)

    if result["status"] != "success":
        return json.dumps(
            {
                "status": "error",
                "error": result["stderr"],
                "stdout": result["stdout"],
            },
        )

    logger.info(f"Created orientation snapshot: {output_path}")

    return json.dumps(
        {
            "status": "success",
            "output_path": str(output_path),
        },
    )

repomix_split_bundle

repomix_split_bundle(
    file_list_path,
    output_dir,
    max_size="500kb",
    compress=True,
)

Pack files into multiple split bundles for large codebases.

When a codebase is too large for a single context window, this tool splits the output into numbered files (e.g., output.1.md, output.2.md).

USE THIS TOOL: - When a previous repomix_bundle call produced output exceeding context limits - For large codebases where you want to process files in manageable chunks - When you need to parallelize analysis across multiple context windows

DO NOT USE: - For small repos that fit in a single bundle (use repomix_bundle) - For initial exploration (use repomix_orientation first) - If you don't have a file list yet (use write_file_list first)

Parameters:

Name Type Description Default
file_list_path str

Path to file containing paths to pack (one per line).

required
output_dir str

Directory for split output files.

required
max_size str

Maximum size per file (e.g., "500kb", "1mb", "2mb").

'500kb'
compress bool

Use tree-sitter compression.

True

Returns:

Type Description
str

JSON with output directory, file count, and individual file paths.

Output Size: Each split file will be at most max_size. Total output depends on input.

Common Errors
  • "File list not found": Ensure file_list_path exists and has content
  • Timeout after 300s: Reduce the number of files in the list
  • "repomix not found": Install with npm install -g repomix
Example

result = repomix_split_bundle(".code-context/files.all.txt", ".code-context/splits/", max_size="1mb")

Source code in src/code_context_agent/tools/discovery.py
@tool
def repomix_split_bundle(
    file_list_path: str,
    output_dir: str,
    max_size: str = "500kb",
    compress: bool = True,
) -> str:
    """Pack files into multiple split bundles for large codebases.

    When a codebase is too large for a single context window, this tool
    splits the output into numbered files (e.g., output.1.md, output.2.md).

    USE THIS TOOL:
    - When a previous repomix_bundle call produced output exceeding context limits
    - For large codebases where you want to process files in manageable chunks
    - When you need to parallelize analysis across multiple context windows

    DO NOT USE:
    - For small repos that fit in a single bundle (use repomix_bundle)
    - For initial exploration (use repomix_orientation first)
    - If you don't have a file list yet (use write_file_list first)

    Args:
        file_list_path: Path to file containing paths to pack (one per line).
        output_dir: Directory for split output files.
        max_size: Maximum size per file (e.g., "500kb", "1mb", "2mb").
        compress: Use tree-sitter compression.

    Returns:
        JSON with output directory, file count, and individual file paths.

    Output Size: Each split file will be at most max_size. Total output depends on input.

    Common Errors:
        - "File list not found": Ensure file_list_path exists and has content
        - Timeout after 300s: Reduce the number of files in the list
        - "repomix not found": Install with npm install -g repomix

    Example:
        >>> result = repomix_split_bundle(".code-context/files.all.txt", ".code-context/splits/", max_size="1mb")
    """
    try:
        file_list = validate_file_path(file_list_path)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})
    try:
        validate_file_path(output_dir, must_exist=False)
    except ValidationError:
        pass  # output_dir is a directory, not a file — just validate traversal
    out_dir = Path(output_dir).resolve()

    if not file_list.exists():
        return json.dumps(
            {
                "status": "error",
                "error": f"File list not found: {file_list}",
            },
        )

    out_dir.mkdir(parents=True, exist_ok=True)

    # repomix --split-output writes numbered files into the output directory.
    # We set -o to a base path inside output_dir; repomix appends .1.md, .2.md, etc.
    base_output = out_dir / "output.md"

    # Build repomix argument list
    repomix_parts = [
        "--stdin",
        "--style",
        "markdown",
        "--output-show-line-numbers",
        "--split-output",
        max_size,
    ]

    if compress:
        repomix_parts.append("--compress")

    # Read file list content and pipe via stdin (no shell)
    file_list_content = file_list.read_text()
    cmd = ["repomix", *repomix_parts, "-o", str(base_output)]

    result = run_command(cmd, timeout=300, input_data=file_list_content)

    if result["status"] != "success":
        return json.dumps(
            {
                "status": "error",
                "error": result["stderr"],
                "stdout": result["stdout"],
            },
        )

    # List the resulting split files
    split_files = sorted(str(p) for p in out_dir.iterdir() if p.is_file() and p.suffix == ".md")

    logger.info(f"Created {len(split_files)} split bundles in {out_dir}")

    return json.dumps(
        {
            "status": "success",
            "output_dir": str(out_dir),
            "file_count": len(split_files),
            "files": split_files,
        },
    )
rg_search(
    pattern,
    repo_path,
    glob=None,
    file_type=None,
    max_count=100,
    context_lines=0,
    count_only=False,
)

Search for pattern in repository using ripgrep.

USE THIS TOOL: - To find entrypoints (e.g., "def main", "createServer", "app.listen") - To locate specific functions, classes, or patterns - To discover imports and dependencies - When you know WHAT to search for but not WHERE - With count_only=True for precise occurrence counts across the entire codebase

DO NOT USE: - For listing all files (use create_file_manifest instead) - For reading file contents (use read_file_bounded instead) - For structural analysis (use lsp_document_symbols instead)

Parameters:

Name Type Description Default
pattern str

Regex pattern to search for.

required
repo_path str

Repository root path.

required
glob str | None

Optional glob filter (e.g., ".py", "src/**/.ts").

None
file_type str | None

Optional file type (e.g., "py", "ts", "js").

None
max_count int

Maximum matches to return per file (default 100).

100
context_lines int

Lines of context around matches (0-5 recommended).

0
count_only bool

Return only match counts per file (no match details). Uses rg --count for exact totals without truncation.

False

Returns:

Type Description
str

JSON with matches array containing path, line_number, and lines.

str

When count_only=True: JSON with total_count and per-file counts.

~200 bytes per match. Results capped at 500 lines.

count_only mode: ~50 bytes per file, no cap.

Pattern Tips
  • Literal strings: "createServer" (no regex escaping needed)
  • Function definitions: "def \w+(" or "function \w+("
  • Class definitions: "class \w+"
  • Imports: "import|from .* import"
  • Case insensitive: Use "(?i)pattern"
Common Errors
  • "rg not found": ripgrep not installed
  • Empty matches with valid pattern: Try broader glob or check file_type
  • Regex syntax error: Escape special chars like ( ) [ ] { }
Example success

{"status": "success", "pattern": "def main", "matches": [...], "match_count": 3}

Example count_only

{"status": "success", "pattern": "TODO", "total_count": 42, "files": {"src/main.py": 12, "src/utils.py": 30}, "file_count": 2}

Example searches

rg_search("def main", "/repo", glob="*.py") # Python entrypoints rg_search("createServer", "/repo", file_type="ts") # TS server setup rg_search("TODO|FIXME", "/repo", count_only=True) # Exact count across repo

Source code in src/code_context_agent/tools/discovery.py
@tool
def rg_search(  # noqa: C901
    pattern: str,
    repo_path: str,
    glob: str | None = None,
    file_type: str | None = None,
    max_count: int = 100,
    context_lines: int = 0,
    count_only: bool = False,
) -> str:
    """Search for pattern in repository using ripgrep.

    USE THIS TOOL:
    - To find entrypoints (e.g., "def main", "createServer", "app.listen")
    - To locate specific functions, classes, or patterns
    - To discover imports and dependencies
    - When you know WHAT to search for but not WHERE
    - With count_only=True for precise occurrence counts across the entire codebase

    DO NOT USE:
    - For listing all files (use create_file_manifest instead)
    - For reading file contents (use read_file_bounded instead)
    - For structural analysis (use lsp_document_symbols instead)

    Args:
        pattern: Regex pattern to search for.
        repo_path: Repository root path.
        glob: Optional glob filter (e.g., "*.py", "src/**/*.ts").
        file_type: Optional file type (e.g., "py", "ts", "js").
        max_count: Maximum matches to return per file (default 100).
        context_lines: Lines of context around matches (0-5 recommended).
        count_only: Return only match counts per file (no match details).
            Uses rg --count for exact totals without truncation.

    Returns:
        JSON with matches array containing path, line_number, and lines.
        When count_only=True: JSON with total_count and per-file counts.

    Output Size: ~200 bytes per match. Results capped at 500 lines.
        count_only mode: ~50 bytes per file, no cap.

    Pattern Tips:
        - Literal strings: "createServer" (no regex escaping needed)
        - Function definitions: "def \\w+\\(" or "function \\w+\\("
        - Class definitions: "class \\w+"
        - Imports: "^import|^from .* import"
        - Case insensitive: Use "(?i)pattern"

    Common Errors:
        - "rg not found": ripgrep not installed
        - Empty matches with valid pattern: Try broader glob or check file_type
        - Regex syntax error: Escape special chars like ( ) [ ] { }

    Example success:
        {"status": "success", "pattern": "def main", "matches": [...], "match_count": 3}

    Example count_only:
        {"status": "success", "pattern": "TODO", "total_count": 42,
         "files": {"src/main.py": 12, "src/utils.py": 30}, "file_count": 2}

    Example searches:
        >>> rg_search("def main", "/repo", glob="*.py")  # Python entrypoints
        >>> rg_search("createServer", "/repo", file_type="ts")  # TS server setup
        >>> rg_search("TODO|FIXME", "/repo", count_only=True)  # Exact count across repo
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})
    try:
        validate_search_pattern(pattern)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})

    if count_only:
        return _rg_count(pattern, repo, glob=glob, file_type=file_type)

    # Build ripgrep command parts for shell execution with proper escaping
    cmd_parts = ["rg", "--json", f"-m {max_count}"]

    if glob:
        cmd_parts.append(f"-g {shlex.quote(glob)}")
    if file_type:
        cmd_parts.append(f"-t {shlex.quote(file_type)}")
    if context_lines > 0:
        cmd_parts.append(f"-C {context_lines}")

    cmd_parts.append(shlex.quote(pattern))
    cmd_parts.append(shlex.quote(str(repo)))

    # Run rg directly without shell pipe to avoid SIGPIPE/broken pipe errors
    # Use subprocess directly for proper STDIO capture
    full_cmd = " ".join(cmd_parts)
    try:
        proc_result = subprocess.run(
            ["sh", "-c", full_cmd],
            cwd=str(repo),
            capture_output=True,
            text=True,
            timeout=60,
        )
        # Limit output to 500 lines after capture to avoid broken pipe
        stdout_lines = proc_result.stdout.split("\n")[:500]
        result = {
            "status": "success" if proc_result.returncode in (0, 1) else "error",
            "stdout": "\n".join(stdout_lines),
            "stderr": proc_result.stderr[:10000] if proc_result.stderr else "",
            "return_code": proc_result.returncode,
        }
    except subprocess.TimeoutExpired:
        result = {
            "status": "error",
            "stdout": "",
            "stderr": "Command timed out after 60 seconds",
            "return_code": -1,
        }
    except (subprocess.SubprocessError, OSError) as e:
        result = {
            "status": "error",
            "stdout": "",
            "stderr": str(e),
            "return_code": -1,
        }

    # Parse JSON lines output
    matches = []
    if result["stdout"]:
        for line in result["stdout"].strip().split("\n"):
            if not line:
                continue
            try:
                data = json.loads(line)
                if data.get("type") == "match":
                    match_data = data.get("data", {})
                    matches.append(
                        {
                            "path": match_data.get("path", {}).get("text", ""),
                            "line_number": match_data.get("line_number"),
                            "lines": match_data.get("lines", {}).get("text", ""),
                        },
                    )
            except json.JSONDecodeError:
                continue

    return json.dumps(
        {
            "status": "success" if result["return_code"] in (0, 1) else "error",  # rg returns 1 for no matches
            "pattern": pattern,
            "matches": matches,
            "match_count": len(matches),
        },
    )

write_file

write_file(file_path, content)

Write content to a file in the output directory.

USE THIS TOOL: - To write CONTEXT.md and other analysis output files - To save narrated context, summaries, or generated documentation - For any file that needs to be created or overwritten in .code-context/

DO NOT USE: - For writing file lists (use write_file_list instead) - For writing to paths outside the analysis output directory

Security: Only allows writing to paths within the .code-context/ output directory to prevent unintended modifications to the analyzed repository.

Parameters:

Name Type Description Default
file_path str

Absolute path to the file to write. Must be within a .code-context/ directory.

required
content str

String content to write to the file.

required

Returns:

Type Description
str

JSON with status, path, and bytes written.

Example

write_file("/repo/.code-context/CONTEXT.md", "# Project Context\n\n## Summary\n...")

Source code in src/code_context_agent/tools/discovery.py
@tool
def write_file(file_path: str, content: str) -> str:
    """Write content to a file in the output directory.

    USE THIS TOOL:
    - To write CONTEXT.md and other analysis output files
    - To save narrated context, summaries, or generated documentation
    - For any file that needs to be created or overwritten in .code-context/

    DO NOT USE:
    - For writing file lists (use write_file_list instead)
    - For writing to paths outside the analysis output directory

    Security: Only allows writing to paths within the .code-context/ output
    directory to prevent unintended modifications to the analyzed repository.

    Args:
        file_path: Absolute path to the file to write. Must be within a .code-context/ directory.
        content: String content to write to the file.

    Returns:
        JSON with status, path, and bytes written.

    Example:
        >>> write_file("/repo/.code-context/CONTEXT.md", "# Project Context\\n\\n## Summary\\n...")
    """
    try:
        path = validate_file_path(file_path, must_exist=False)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})

    # Security: only allow writes within a .code-context/ directory
    parts = path.parts
    if ".code-context" not in parts:
        return json.dumps(
            {
                "status": "error",
                "error": f"Write denied: {path} is not within a .code-context/ directory",
            },
        )

    path.parent.mkdir(parents=True, exist_ok=True)

    try:
        path.write_text(content, encoding="utf-8")
    except OSError as e:
        return json.dumps({"status": "error", "error": str(e)})

    logger.info(f"Wrote {len(content)} bytes to {path}")

    return json.dumps(
        {
            "status": "success",
            "path": str(path),
            "bytes_written": len(content),
        },
    )

write_file_list

write_file_list(file_paths, output_path)

Write a list of file paths to a file for repomix bundling.

Use this to create the curated file list before calling repomix_bundle.

Parameters:

Name Type Description Default
file_paths list[str]

List of file paths to include in the bundle.

required
output_path str

Path to write the file list.

required

Returns:

Type Description
str

JSON with output path and file count.

Example

result = write_file_list(["src/main.ts", "src/utils.ts"], ".code-context/files.targeted.txt")

Source code in src/code_context_agent/tools/discovery.py
@tool
def write_file_list(file_paths: list[str], output_path: str) -> str:
    """Write a list of file paths to a file for repomix bundling.

    Use this to create the curated file list before calling repomix_bundle.

    Args:
        file_paths: List of file paths to include in the bundle.
        output_path: Path to write the file list.

    Returns:
        JSON with output path and file count.

    Example:
        >>> result = write_file_list(["src/main.ts", "src/utils.ts"], ".code-context/files.targeted.txt")
    """
    try:
        output = validate_file_path(output_path, must_exist=False)
    except ValidationError as e:
        return json.dumps({"status": "error", "error": str(e)})
    output.parent.mkdir(parents=True, exist_ok=True)

    # Deduplicate and sort
    unique_paths = sorted(set(file_paths))

    with output.open("w") as f:
        for path in unique_paths:
            f.write(f"{path}\n")

    logger.info(f"Wrote {len(unique_paths)} paths to {output}")

    return json.dumps(
        {
            "status": "success",
            "output_path": str(output),
            "file_count": len(unique_paths),
        },
    )

git_blame_summary

git_blame_summary(repo_path, file_path)

Get authorship summary for a file.

USE THIS TOOL: - To identify who has expertise on a file - To understand code ownership distribution - To find the right person to ask about code - To see how recently different parts were modified

DO NOT USE: - For files not tracked by git - When you need line-by-line attribution (use git blame directly)

Provides a summary of who wrote which portions of a file, aggregated by author.

Parameters:

Name Type Description Default
repo_path str

Absolute path to the repository root.

required
file_path str

Path to the file (relative to repo root or absolute).

required

Returns:

Type Description
str

JSON with author breakdown by lines owned.

Output Size: ~100 bytes per author.

Example success

{"status": "success", "file_path": "src/main.py", "total_lines": 150, "authors": [{"email": "dev@example.com", "lines": 100, "percentage": 66.7, "last_commit_date": "2024-01-15"}]}

Source code in src/code_context_agent/tools/git.py
@tool
def git_blame_summary(
    repo_path: str,
    file_path: str,
) -> str:
    """Get authorship summary for a file.

    USE THIS TOOL:
    - To identify who has expertise on a file
    - To understand code ownership distribution
    - To find the right person to ask about code
    - To see how recently different parts were modified

    DO NOT USE:
    - For files not tracked by git
    - When you need line-by-line attribution (use git blame directly)

    Provides a summary of who wrote which portions of a file,
    aggregated by author.

    Args:
        repo_path: Absolute path to the repository root.
        file_path: Path to the file (relative to repo root or absolute).

    Returns:
        JSON with author breakdown by lines owned.

    Output Size: ~100 bytes per author.

    Example success:
        {"status": "success", "file_path": "src/main.py", "total_lines": 150,
         "authors": [{"email": "dev@example.com", "lines": 100, "percentage": 66.7,
                      "last_commit_date": "2024-01-15"}]}
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return ToolResult.error(str(e)).to_json()

    # Normalize file path
    if Path(file_path).is_absolute():
        try:
            file_path = str(Path(file_path).relative_to(repo))
        except ValueError:
            return ToolResult.error(f"File path {file_path} is not within repo {repo}").to_json()

    # Get blame output with email and date
    cmd = ["git", "blame", "--line-porcelain", file_path]
    result = run_command(cmd, cwd=str(repo), timeout=60)

    if result["status"] != "success":
        return ToolResult.error(f"Failed to get blame: {result['stderr']}").to_json()

    # Parse porcelain output
    author_stats: dict[str, dict[str, Any]] = {}
    total_lines = 0
    current_author = ""
    current_date = ""

    for line in result["stdout"].split("\n"):
        current_author, current_date = _parse_blame_line(line, current_author, current_date)

        if line.startswith("\t") and current_author:
            # Content line marks end of a blame entry
            total_lines += 1
            if current_author not in author_stats:
                author_stats[current_author] = {"lines": 0, "last_date": ""}
            author_stats[current_author]["lines"] += 1
            author_stats[current_author]["last_date"] = max(
                author_stats[current_author]["last_date"],
                current_date,
            )

    # Build summary sorted by lines
    authors = [
        {
            "email": email,
            "lines": data["lines"],
            "percentage": round(100 * data["lines"] / total_lines, 1) if total_lines > 0 else 0,
            "last_commit_date": data["last_date"],
        }
        for email, data in sorted(author_stats.items(), key=lambda x: x[1]["lines"], reverse=True)
    ]

    return ToolResult.success(
        file_path=file_path,
        total_lines=total_lines,
        authors=authors,
        author_count=len(authors),
    ).to_json()

git_contributors

git_contributors(repo_path, limit=100)

Get contributor statistics for the repository.

USE THIS TOOL: - To identify key contributors and their areas of focus - To understand team structure and expertise distribution - To find domain experts for specific areas

DO NOT USE: - When you only need file-specific authorship (use git_blame_summary instead)

Parameters:

Name Type Description Default
repo_path str

Absolute path to the repository root.

required
limit int

Maximum commits to analyze (default 100).

100

Returns:

Type Description
str

JSON with contributors ranked by commit count.

Output Size: ~100 bytes per contributor.

Example success

{"status": "success", "contributors": [ {"email": "dev1@example.com", "commits": 50, "percentage": 50.0, "first_commit": "2023-06-01", "last_commit": "2024-01-15"} ], "total_commits": 100}

Source code in src/code_context_agent/tools/git.py
@tool
def git_contributors(
    repo_path: str,
    limit: int = 100,
) -> str:
    """Get contributor statistics for the repository.

    USE THIS TOOL:
    - To identify key contributors and their areas of focus
    - To understand team structure and expertise distribution
    - To find domain experts for specific areas

    DO NOT USE:
    - When you only need file-specific authorship (use git_blame_summary instead)

    Args:
        repo_path: Absolute path to the repository root.
        limit: Maximum commits to analyze (default 100).

    Returns:
        JSON with contributors ranked by commit count.

    Output Size: ~100 bytes per contributor.

    Example success:
        {"status": "success", "contributors": [
            {"email": "dev1@example.com", "commits": 50, "percentage": 50.0,
             "first_commit": "2023-06-01", "last_commit": "2024-01-15"}
        ], "total_commits": 100}
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return ToolResult.error(str(e)).to_json()

    cmd = [
        "git",
        "log",
        f"-n{limit}",
        "--pretty=format:%ae|%as",
    ]

    result = run_command(cmd, cwd=str(repo))

    if result["status"] != "success":
        return ToolResult.error(f"Failed to get contributors: {result['stderr']}").to_json()

    # Track commits and date ranges per author
    author_data: dict[str, dict[str, Any]] = {}
    total_commits = 0

    for line in result["stdout"].strip().split("\n"):
        if not line.strip():
            continue
        parts = line.split("|", 1)
        if len(parts) >= 2:
            email, date = parts[0], parts[1]
            total_commits += 1

            if email not in author_data:
                author_data[email] = {"commits": 0, "first_date": date, "last_date": date}

            author_data[email]["commits"] += 1
            # Update date range
            author_data[email]["first_date"] = min(author_data[email]["first_date"], date)
            author_data[email]["last_date"] = max(author_data[email]["last_date"], date)

    contributors = [
        {
            "email": email,
            "commits": data["commits"],
            "percentage": round(100 * data["commits"] / total_commits, 1) if total_commits > 0 else 0,
            "first_commit": data["first_date"],
            "last_commit": data["last_date"],
        }
        for email, data in sorted(author_data.items(), key=lambda x: x[1]["commits"], reverse=True)
    ]

    return ToolResult.success(
        contributors=contributors,
        total_commits=total_commits,
        contributor_count=len(contributors),
    ).to_json()

git_diff_file

git_diff_file(
    repo_path, file_path, commit=None, context_lines=3
)

Get the diff for a specific file.

USE THIS TOOL: - To see exact changes in a file - To understand what changed between commits - For code review or change analysis - To investigate recent modifications

DO NOT USE: - For large binary files - When you need full file content (use read_file_bounded instead)

Shows the unified diff for a file. Without a commit, shows unstaged changes. With a commit hash, shows changes introduced by that commit.

Parameters:

Name Type Description Default
repo_path str

Absolute path to the repository root.

required
file_path str

Path to the file (relative to repo root or absolute).

required
commit str | None

Optional commit hash to show changes from that commit.

None
context_lines int

Lines of context around changes (default 3).

3

Returns:

Type Description
str

JSON with diff content and metadata.

Output Size: Varies by change size, typically 1-10KB.

Example success

{"status": "success", "file_path": "src/main.py", "commit": "abc123", "diff": "@@ -10,5 +10,7 @@..."}

Source code in src/code_context_agent/tools/git.py
@tool
def git_diff_file(
    repo_path: str,
    file_path: str,
    commit: str | None = None,
    context_lines: int = 3,
) -> str:
    """Get the diff for a specific file.

    USE THIS TOOL:
    - To see exact changes in a file
    - To understand what changed between commits
    - For code review or change analysis
    - To investigate recent modifications

    DO NOT USE:
    - For large binary files
    - When you need full file content (use read_file_bounded instead)

    Shows the unified diff for a file. Without a commit, shows unstaged changes.
    With a commit hash, shows changes introduced by that commit.

    Args:
        repo_path: Absolute path to the repository root.
        file_path: Path to the file (relative to repo root or absolute).
        commit: Optional commit hash to show changes from that commit.
        context_lines: Lines of context around changes (default 3).

    Returns:
        JSON with diff content and metadata.

    Output Size: Varies by change size, typically 1-10KB.

    Example success:
        {"status": "success", "file_path": "src/main.py",
         "commit": "abc123", "diff": "@@ -10,5 +10,7 @@..."}
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return ToolResult.error(str(e)).to_json()

    # Normalize file path
    if Path(file_path).is_absolute():
        try:
            file_path = str(Path(file_path).relative_to(repo))
        except ValueError:
            return ToolResult.error(f"File path {file_path} is not within repo {repo}").to_json()

    if commit:
        # Show diff for specific commit
        cmd = [
            "git",
            "show",
            f"-U{context_lines}",
            "--pretty=format:",
            commit,
            "--",
            file_path,
        ]
    else:
        # Show unstaged changes
        cmd = [
            "git",
            "diff",
            f"-U{context_lines}",
            "--",
            file_path,
        ]

    result = run_command(cmd, cwd=str(repo), max_output=50_000)

    if result["status"] != "success":
        return ToolResult.error(f"Failed to get diff: {result['stderr']}").to_json()

    diff_content = result["stdout"].strip()

    if not diff_content:
        return ToolResult.success(
            file_path=file_path,
            commit=commit,
            diff="",
            note="No changes found",
        ).to_json()

    # Parse diff stats
    additions = len(re.findall(r"^\+[^+]", diff_content, re.MULTILINE))
    deletions = len(re.findall(r"^-[^-]", diff_content, re.MULTILINE))

    return ToolResult.success(
        file_path=file_path,
        commit=commit,
        diff=diff_content,
        additions=additions,
        deletions=deletions,
        truncated=result.get("truncated", False),
    ).to_json()

git_file_history

git_file_history(repo_path, file_path, limit=20)

Get commit history for a specific file.

USE THIS TOOL: - To understand how a file has evolved over time - To find when specific changes were introduced - To identify who has worked on a file - To trace the intent behind changes via commit messages

DO NOT USE: - For repository-wide history (use git_recent_commits instead) - For files not yet tracked by git

Returns recent commits that touched the specified file, including commit messages which often explain the "why" behind changes.

Parameters:

Name Type Description Default
repo_path str

Absolute path to the repository root.

required
file_path str

Path to the file (relative to repo root or absolute).

required
limit int

Maximum commits to return (default 20).

20

Returns:

Type Description
str

JSON with commits array containing hash, author, date, and message.

Output Size: ~200 bytes per commit.

Example success

{"status": "success", "file_path": "src/main.py", "commits": [{"hash": "abc123", "author": "dev@example.com", "date": "2024-01-15", "message": "Fix auth bug"}]}

Source code in src/code_context_agent/tools/git.py
@tool
def git_file_history(
    repo_path: str,
    file_path: str,
    limit: int = 20,
) -> str:
    """Get commit history for a specific file.

    USE THIS TOOL:
    - To understand how a file has evolved over time
    - To find when specific changes were introduced
    - To identify who has worked on a file
    - To trace the intent behind changes via commit messages

    DO NOT USE:
    - For repository-wide history (use git_recent_commits instead)
    - For files not yet tracked by git

    Returns recent commits that touched the specified file, including
    commit messages which often explain the "why" behind changes.

    Args:
        repo_path: Absolute path to the repository root.
        file_path: Path to the file (relative to repo root or absolute).
        limit: Maximum commits to return (default 20).

    Returns:
        JSON with commits array containing hash, author, date, and message.

    Output Size: ~200 bytes per commit.

    Example success:
        {"status": "success", "file_path": "src/main.py",
         "commits": [{"hash": "abc123", "author": "dev@example.com",
                      "date": "2024-01-15", "message": "Fix auth bug"}]}
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return ToolResult.error(str(e)).to_json()

    # Normalize file path
    if Path(file_path).is_absolute():
        try:
            file_path = str(Path(file_path).relative_to(repo))
        except ValueError:
            return ToolResult.error(f"File path {file_path} is not within repo {repo}").to_json()

    cmd = [
        "git",
        "log",
        f"-n{limit}",
        "--pretty=format:%H|%ae|%as|%s",
        "--",
        file_path,
    ]

    result = run_command(cmd, cwd=str(repo))

    if result["status"] != "success":
        return ToolResult.error(f"Failed to get history: {result['stderr']}").to_json()

    commits = []
    for line in result["stdout"].strip().split("\n"):
        if not line.strip():
            continue
        parts = line.split("|", 3)
        if len(parts) >= 4:
            commits.append(
                {
                    "hash": parts[0],
                    "author": parts[1],
                    "date": parts[2],
                    "message": parts[3],
                },
            )

    return ToolResult.success(
        file_path=file_path,
        commits=commits,
        commit_count=len(commits),
    ).to_json()

git_files_changed_together

git_files_changed_together(repo_path, file_path, limit=100)

Find files that frequently change together with a given file (coupling detection).

USE THIS TOOL: - To identify tightly coupled files that may need to change together - To understand implicit dependencies not captured by imports - To find related files when making changes - To detect architectural coupling patterns

DO NOT USE: - For untracked files (not yet in git) - For files with no commit history

Analyzes git history to find files that appear in the same commits as the target file, ranked by co-occurrence frequency.

Parameters:

Name Type Description Default
repo_path str

Absolute path to the repository root.

required
file_path str

Path to the file (relative to repo root or absolute).

required
limit int

Maximum number of commits to analyze (default 100).

100

Returns:

Type Description
str

JSON with:

str
  • cochanged_files: List of {path, count, percentage} sorted by frequency
str
  • total_commits: Number of commits analyzed
str
  • file_path: The analyzed file

Output Size: ~100 bytes per co-changed file.

Example success

{"status": "success", "file_path": "src/auth.py", "total_commits": 45, "cochanged_files": [{"path": "src/user.py", "count": 20, "percentage": 44.4}, ...]}

Example patterns detected
  • High coupling (>50%): Files should possibly be merged or abstracted
  • Medium coupling (20-50%): Normal feature-level coupling
  • Low coupling (<20%): Incidental changes, less significant
Source code in src/code_context_agent/tools/git.py
@tool
def git_files_changed_together(
    repo_path: str,
    file_path: str,
    limit: int = 100,
) -> str:
    """Find files that frequently change together with a given file (coupling detection).

    USE THIS TOOL:
    - To identify tightly coupled files that may need to change together
    - To understand implicit dependencies not captured by imports
    - To find related files when making changes
    - To detect architectural coupling patterns

    DO NOT USE:
    - For untracked files (not yet in git)
    - For files with no commit history

    Analyzes git history to find files that appear in the same commits
    as the target file, ranked by co-occurrence frequency.

    Args:
        repo_path: Absolute path to the repository root.
        file_path: Path to the file (relative to repo root or absolute).
        limit: Maximum number of commits to analyze (default 100).

    Returns:
        JSON with:
        - cochanged_files: List of {path, count, percentage} sorted by frequency
        - total_commits: Number of commits analyzed
        - file_path: The analyzed file

    Output Size: ~100 bytes per co-changed file.

    Example success:
        {"status": "success", "file_path": "src/auth.py", "total_commits": 45,
         "cochanged_files": [{"path": "src/user.py", "count": 20, "percentage": 44.4}, ...]}

    Example patterns detected:
        - High coupling (>50%): Files should possibly be merged or abstracted
        - Medium coupling (20-50%): Normal feature-level coupling
        - Low coupling (<20%): Incidental changes, less significant
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return ToolResult.error(str(e)).to_json()

    # Normalize file path to be relative to repo
    if Path(file_path).is_absolute():
        try:
            file_path = str(Path(file_path).relative_to(repo))
        except ValueError:
            return ToolResult.error(f"File path {file_path} is not within repo {repo}").to_json()

    # Get commits that touched this file
    commits_cmd = [
        "git",
        "log",
        f"-n{limit}",
        "--pretty=format:%H",
        "--",
        file_path,
    ]
    commits_result = run_command(commits_cmd, cwd=str(repo))

    if commits_result["status"] != "success":
        return ToolResult.error(
            f"Failed to get commits: {commits_result['stderr']}",
        ).to_json()

    commit_hashes = [h.strip() for h in commits_result["stdout"].strip().split("\n") if h.strip()]

    if not commit_hashes:
        return ToolResult.success(
            file_path=file_path,
            total_commits=0,
            cochanged_files=[],
            note="No commits found for this file",
        ).to_json()

    # Get files changed in each commit
    cochange_counter: Counter[str] = Counter()
    total_commits = len(commit_hashes)

    for commit_hash in commit_hashes:
        files_cmd = [
            "git",
            "show",
            "--pretty=format:",
            "--name-only",
            commit_hash,
        ]
        files_result = run_command(files_cmd, cwd=str(repo), timeout=30)

        if files_result["status"] == "success":
            changed_files = [f.strip() for f in files_result["stdout"].strip().split("\n") if f.strip()]
            # Exclude the target file itself
            other_files = [f for f in changed_files if f != file_path]
            cochange_counter.update(other_files)

    # Build ranked list
    cochanged_files = [
        {
            "path": path,
            "count": count,
            "percentage": round(100 * count / total_commits, 1),
        }
        for path, count in cochange_counter.most_common(50)
    ]

    logger.info(f"Found {len(cochanged_files)} co-changed files for {file_path}")

    return ToolResult.success(
        file_path=file_path,
        total_commits=total_commits,
        cochanged_files=cochanged_files,
    ).to_json()

git_hotspots

git_hotspots(repo_path, limit=50, since=None)

Identify frequently changed files (change hotspots).

USE THIS TOOL: - To find areas of high activity/churn - To identify potentially problematic code (frequent changes may indicate bugs) - To prioritize code review or refactoring efforts - To understand where development effort is concentrated

DO NOT USE: - For small repositories with little history

Analyzes git history to find files with the most commits, which often indicates areas of active development or instability.

Parameters:

Name Type Description Default
repo_path str

Absolute path to the repository root.

required
limit int

Maximum commits to analyze (default 50).

50
since str | None

Optional date filter (e.g., "2024-01-01", "6 months ago").

None

Returns:

Type Description
str

JSON with hotspots ranked by commit frequency.

Output Size: ~80 bytes per file.

Example success

{"status": "success", "hotspots": [ {"path": "src/auth.py", "commits": 25, "percentage": 50.0}, {"path": "src/api.py", "commits": 15, "percentage": 30.0} ], "total_commits_analyzed": 50}

Interpretation
  • High commit files may need: better tests, refactoring, or documentation
  • Stable files (few commits) are often mature/well-designed
Source code in src/code_context_agent/tools/git.py
@tool
def git_hotspots(
    repo_path: str,
    limit: int = 50,
    since: str | None = None,
) -> str:
    """Identify frequently changed files (change hotspots).

    USE THIS TOOL:
    - To find areas of high activity/churn
    - To identify potentially problematic code (frequent changes may indicate bugs)
    - To prioritize code review or refactoring efforts
    - To understand where development effort is concentrated

    DO NOT USE:
    - For small repositories with little history

    Analyzes git history to find files with the most commits,
    which often indicates areas of active development or instability.

    Args:
        repo_path: Absolute path to the repository root.
        limit: Maximum commits to analyze (default 50).
        since: Optional date filter (e.g., "2024-01-01", "6 months ago").

    Returns:
        JSON with hotspots ranked by commit frequency.

    Output Size: ~80 bytes per file.

    Example success:
        {"status": "success", "hotspots": [
            {"path": "src/auth.py", "commits": 25, "percentage": 50.0},
            {"path": "src/api.py", "commits": 15, "percentage": 30.0}
        ], "total_commits_analyzed": 50}

    Interpretation:
        - High commit files may need: better tests, refactoring, or documentation
        - Stable files (few commits) are often mature/well-designed
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return ToolResult.error(str(e)).to_json()

    cmd = ["git", "log", f"-n{limit}", "--pretty=format:", "--name-only"]
    if since:
        cmd.extend(["--since", since])

    result = run_command(cmd, cwd=str(repo), timeout=60)

    if result["status"] != "success":
        return ToolResult.error(f"Failed to analyze history: {result['stderr']}").to_json()

    # Count file occurrences
    file_counter: Counter[str] = Counter()
    commit_count = 0

    lines = result["stdout"].strip().split("\n")
    in_commit = False

    for raw_line in lines:
        stripped = raw_line.strip()
        if not stripped:
            if in_commit:
                commit_count += 1
            in_commit = False
            continue
        in_commit = True
        file_counter[stripped] += 1

    # Account for last commit if file ends without blank line
    if in_commit:
        commit_count += 1

    hotspots = [
        {
            "path": path,
            "commits": count,
            "percentage": round(100 * count / commit_count, 1) if commit_count > 0 else 0,
        }
        for path, count in file_counter.most_common(30)
    ]

    logger.info(f"Found {len(hotspots)} hotspot files from {commit_count} commits")

    return ToolResult.success(
        hotspots=hotspots,
        total_commits_analyzed=commit_count,
        unique_files=len(file_counter),
    ).to_json()

git_recent_commits

git_recent_commits(repo_path, limit=30, branch='HEAD')

Get recent commits from the repository.

USE THIS TOOL: - To understand recent development activity - To identify active areas of the codebase - To see the general direction of development - To find commits relevant to a feature or bug

DO NOT USE: - For file-specific history (use git_file_history instead)

Returns recent commits from the specified branch with messages that provide context about development activity.

Parameters:

Name Type Description Default
repo_path str

Absolute path to the repository root.

required
limit int

Maximum commits to return (default 30).

30
branch str

Branch or ref to query (default HEAD).

'HEAD'

Returns:

Type Description
str

JSON with commits array containing hash, author, date, message,

str

and files_changed count.

Output Size: ~250 bytes per commit.

Example success

{"status": "success", "branch": "main", "commits": [{"hash": "abc123", "author": "dev@example.com", "date": "2024-01-15", "message": "Add feature X", "files_changed": 5}]}

Source code in src/code_context_agent/tools/git.py
@tool
def git_recent_commits(
    repo_path: str,
    limit: int = 30,
    branch: str = "HEAD",
) -> str:
    """Get recent commits from the repository.

    USE THIS TOOL:
    - To understand recent development activity
    - To identify active areas of the codebase
    - To see the general direction of development
    - To find commits relevant to a feature or bug

    DO NOT USE:
    - For file-specific history (use git_file_history instead)

    Returns recent commits from the specified branch with messages
    that provide context about development activity.

    Args:
        repo_path: Absolute path to the repository root.
        limit: Maximum commits to return (default 30).
        branch: Branch or ref to query (default HEAD).

    Returns:
        JSON with commits array containing hash, author, date, message,
        and files_changed count.

    Output Size: ~250 bytes per commit.

    Example success:
        {"status": "success", "branch": "main",
         "commits": [{"hash": "abc123", "author": "dev@example.com",
                      "date": "2024-01-15", "message": "Add feature X",
                      "files_changed": 5}]}
    """
    try:
        repo = validate_repo_path(repo_path)
    except ValidationError as e:
        return ToolResult.error(str(e)).to_json()

    cmd = [
        "git",
        "log",
        f"-n{limit}",
        "--pretty=format:%H|%ae|%as|%s",
        "--shortstat",
        branch,
    ]

    result = run_command(cmd, cwd=str(repo))

    if result["status"] != "success":
        return ToolResult.error(f"Failed to get commits: {result['stderr']}").to_json()

    commits = []
    lines = result["stdout"].strip().split("\n")
    i = 0

    while i < len(lines):
        line = lines[i].strip()
        if not line:
            i += 1
            continue

        # Check if this is a commit line (contains pipe separators)
        if "|" in line:
            parts = line.split("|", 3)
            if len(parts) >= 4:
                commit = {
                    "hash": parts[0],
                    "author": parts[1],
                    "date": parts[2],
                    "message": parts[3],
                    "files_changed": 0,
                }

                # Look for stat line following commit
                if i + 1 < len(lines):
                    stat_line = lines[i + 1].strip()
                    # Parse "X files changed, Y insertions(+), Z deletions(-)"
                    files_match = re.search(r"(\d+) files? changed", stat_line)
                    if files_match:
                        commit["files_changed"] = int(files_match.group(1))
                        i += 1  # Skip the stat line

                commits.append(commit)
        i += 1

    return ToolResult.success(
        branch=branch,
        commits=commits,
        commit_count=len(commits),
    ).to_json()

code_graph_analyze

code_graph_analyze(
    graph_id,
    analysis_type,
    top_k=10,
    node_a="",
    node_b="",
    resolution=1.0,
    category="",
)

Run graph algorithms to surface structural insights about the codebase.

USE THIS TOOL: - After populating graph with code_graph_ingest_* tools - To find important code that isn't obvious from file names - To understand code relationships and architecture

DO NOT USE: - On an empty graph (ingest data first) - For simple lookups (use code_graph_explore instead)

Analysis types provide different perspectives:

Centrality (finds important code): - "hotspots": Betweenness centrality. Finds bottleneck code that many paths go through. High score = integration point, likely to cause cascading changes. Use for: risk assessment, refactoring targets. - "foundations": PageRank. Finds core infrastructure that other important code depends on. High score = foundational code. Use for: understanding dependencies, documentation priority. - "entry_points": Nodes with no incoming edges but outgoing calls. These start execution flows. Use for: understanding app structure.

Clustering (finds groupings): - "modules": Louvain community detection. Finds densely connected groups = logical modules/layers. Use for: architecture diagrams, understanding boundaries.

Relationships (between specific nodes): - "coupling": Measures how tightly two nodes are connected. Use for: understanding change impact, identifying tight coupling. - "similar": Personalized PageRank from a node. Finds related code. Use for: understanding a node's neighborhood. - "dependencies": BFS from a node. Shows what it depends on. Use for: understanding impact of changes.

Filtering: - "category": Finds all nodes in a business logic category. Use for: focused analysis on db/auth/validation/etc.

Code Health: - "unused_symbols": Finds functions/classes/methods with zero cross-file references. Dead code candidates. Use category param for node type filter. - "refactoring": Combines clone detection, code smells, and unused symbols into ranked refactoring opportunities.

Parameters:

Name Type Description Default
graph_id str

ID of the graph to analyze (must have data from ingestion)

required
analysis_type str

Algorithm to run. One of: - "hotspots": Returns ranked list by betweenness score - "foundations": Returns ranked list by PageRank score - "entry_points": Returns list of entry point nodes - "modules": Returns list of detected modules with members - "coupling": Returns coupling metrics (requires node_a, node_b) - "similar": Returns similar nodes (requires node_a) - "category": Returns nodes in category (requires category) - "dependencies": Returns dependency chain (requires node_a) - "trust": TrustRank-based foundations (noise-resistant PageRank from entry points) - "triangles": Find tightly-coupled code triads - "unused_symbols": Dead code detection (zero cross-file references) - "refactoring": Combined refactoring opportunity ranking

required
top_k int

Maximum results for ranked analyses (hotspots, foundations, similar). Default 10. Use 20-30 for comprehensive analysis.

10
node_a str

Required for "coupling", "similar", "dependencies". Node ID format: "file_path:symbol_name"

''
node_b str

Required for "coupling" analysis. Second node to compare.

''
resolution float

For "modules" only. Controls cluster granularity: - < 1.0: Fewer, larger clusters (e.g., 0.5 for high-level layers) - = 1.0: Default clustering - > 1.0: More, smaller clusters (e.g., 1.5 for fine-grained)

1.0
category str

Required for "category" analysis. Category name from AST-grep rule packs: "db", "auth", "http", "validation", etc.

''

Returns:

Name Type Description
str

JSON with analysis results. Format varies by type:

str

hotspots/foundations:

str

{"results": [{"id": "...", "score": 0.85, "name": "...", ...}]}

modules str
str

{"module_count": 5, "results": [

str

]}

coupling str
str

{"results": {"coupling": 2.5, "shared_neighbors": 3, "path_length": 2}}

Output Size: 1-10KB depending on top_k and analysis type

Workflow Examples:

Find bottleneck code

hotspots = code_graph_analyze("main", "hotspots", top_k=15)

Results ranked by betweenness - top items are integration points

Detect architecture layers

modules = code_graph_analyze("main", "modules", resolution=0.8)

Each module is a logical grouping - name based on key_nodes

Understand coupling

coupling = code_graph_analyze("main", "coupling", node_a="src/api.py:handler", node_b="src/db.py:repository")

High coupling score = tightly connected, changes propagate

Find all database operations

db_ops = code_graph_analyze("main", "category", category="db")

Returns all nodes tagged as "db" from AST-grep ingestion

Source code in src/code_context_agent/tools/graph/tools.py
@tool
def code_graph_analyze(  # noqa: C901
    graph_id: str,
    analysis_type: str,
    top_k: int = 10,
    node_a: str = "",
    node_b: str = "",
    resolution: float = 1.0,
    category: str = "",
) -> str:
    """Run graph algorithms to surface structural insights about the codebase.

    USE THIS TOOL:
    - After populating graph with code_graph_ingest_* tools
    - To find important code that isn't obvious from file names
    - To understand code relationships and architecture

    DO NOT USE:
    - On an empty graph (ingest data first)
    - For simple lookups (use code_graph_explore instead)

    Analysis types provide different perspectives:

    **Centrality (finds important code):**
    - "hotspots": Betweenness centrality. Finds bottleneck code that many
      paths go through. High score = integration point, likely to cause
      cascading changes. Use for: risk assessment, refactoring targets.
    - "foundations": PageRank. Finds core infrastructure that other
      important code depends on. High score = foundational code.
      Use for: understanding dependencies, documentation priority.
    - "entry_points": Nodes with no incoming edges but outgoing calls.
      These start execution flows. Use for: understanding app structure.

    **Clustering (finds groupings):**
    - "modules": Louvain community detection. Finds densely connected
      groups = logical modules/layers. Use for: architecture diagrams,
      understanding boundaries.

    **Relationships (between specific nodes):**
    - "coupling": Measures how tightly two nodes are connected.
      Use for: understanding change impact, identifying tight coupling.
    - "similar": Personalized PageRank from a node. Finds related code.
      Use for: understanding a node's neighborhood.
    - "dependencies": BFS from a node. Shows what it depends on.
      Use for: understanding impact of changes.

    **Filtering:**
    - "category": Finds all nodes in a business logic category.
      Use for: focused analysis on db/auth/validation/etc.

    **Code Health:**
    - "unused_symbols": Finds functions/classes/methods with zero cross-file
      references. Dead code candidates. Use category param for node type filter.
    - "refactoring": Combines clone detection, code smells, and unused symbols
      into ranked refactoring opportunities.

    Args:
        graph_id: ID of the graph to analyze (must have data from ingestion)
        analysis_type: Algorithm to run. One of:
            - "hotspots": Returns ranked list by betweenness score
            - "foundations": Returns ranked list by PageRank score
            - "entry_points": Returns list of entry point nodes
            - "modules": Returns list of detected modules with members
            - "coupling": Returns coupling metrics (requires node_a, node_b)
            - "similar": Returns similar nodes (requires node_a)
            - "category": Returns nodes in category (requires category)
            - "dependencies": Returns dependency chain (requires node_a)
            - "trust": TrustRank-based foundations (noise-resistant PageRank from entry points)
            - "triangles": Find tightly-coupled code triads
            - "unused_symbols": Dead code detection (zero cross-file references)
            - "refactoring": Combined refactoring opportunity ranking
        top_k: Maximum results for ranked analyses (hotspots, foundations,
            similar). Default 10. Use 20-30 for comprehensive analysis.
        node_a: Required for "coupling", "similar", "dependencies".
            Node ID format: "file_path:symbol_name"
        node_b: Required for "coupling" analysis. Second node to compare.
        resolution: For "modules" only. Controls cluster granularity:
            - < 1.0: Fewer, larger clusters (e.g., 0.5 for high-level layers)
            - = 1.0: Default clustering
            - > 1.0: More, smaller clusters (e.g., 1.5 for fine-grained)
        category: Required for "category" analysis. Category name from
            AST-grep rule packs: "db", "auth", "http", "validation", etc.

    Returns:
        JSON with analysis results. Format varies by type:

        hotspots/foundations:
        {"results": [{"id": "...", "score": 0.85, "name": "...", ...}]}

        modules:
        {"module_count": 5, "results": [
            {"module_id": 0, "size": 15, "key_nodes": [...], "cohesion": 0.8}
        ]}

        coupling:
        {"results": {"coupling": 2.5, "shared_neighbors": 3, "path_length": 2}}

    Output Size: 1-10KB depending on top_k and analysis type

    Workflow Examples:

    Find bottleneck code:
        hotspots = code_graph_analyze("main", "hotspots", top_k=15)
        # Results ranked by betweenness - top items are integration points

    Detect architecture layers:
        modules = code_graph_analyze("main", "modules", resolution=0.8)
        # Each module is a logical grouping - name based on key_nodes

    Understand coupling:
        coupling = code_graph_analyze("main", "coupling",
                                       node_a="src/api.py:handler",
                                       node_b="src/db.py:repository")
        # High coupling score = tightly connected, changes propagate

    Find all database operations:
        db_ops = code_graph_analyze("main", "category", category="db")
        # Returns all nodes tagged as "db" from AST-grep ingestion
    """
    graph = _get_graph(graph_id)
    if graph is None:
        return _json_response({"status": "error", "message": f"Graph not found: {graph_id}"})

    analyzer = CodeAnalyzer(graph)

    if analysis_type == "hotspots":
        results = analyzer.find_hotspots(top_k)
        return _json_response({"status": "success", "analysis": "hotspots", "results": results})

    if analysis_type == "foundations":
        results = analyzer.find_foundations(top_k)
        return _json_response({"status": "success", "analysis": "foundations", "results": results})

    if analysis_type == "entry_points":
        results = analyzer.find_entry_points()
        return _json_response({"status": "success", "analysis": "entry_points", "results": results})

    if analysis_type == "modules":
        results = analyzer.detect_modules(resolution)
        return _json_response(
            {"status": "success", "analysis": "modules", "module_count": len(results), "results": results},
        )

    if analysis_type == "coupling":
        if not node_a or not node_b:
            return _json_response({"status": "error", "message": "node_a and node_b required for coupling"})
        results = analyzer.calculate_coupling(node_a, node_b)
        return _json_response({"status": "success", "analysis": "coupling", "results": results})

    if analysis_type == "similar":
        if not node_a:
            return _json_response({"status": "error", "message": "node_a required for similar analysis"})
        results = analyzer.get_similar_nodes(node_a, top_k)
        return _json_response({"status": "success", "analysis": "similar", "results": results})

    if analysis_type == "category":
        if not category:
            return _json_response({"status": "error", "message": "category required for category analysis"})
        results = analyzer.find_clusters_by_category(category)
        return _json_response({"status": "success", "analysis": "category", "category": category, "results": results})

    if analysis_type == "dependencies":
        if not node_a:
            return _json_response({"status": "error", "message": "node_a required for dependencies"})
        direction = "outgoing"  # What does this node depend on
        results = analyzer.get_dependency_chain(node_a, direction)
        return _json_response({"status": "success", "analysis": "dependencies", "results": results})

    if analysis_type == "trust":
        results = analyzer.find_trusted_foundations(top_k=top_k)
        return _json_response({"status": "success", "analysis": "trust", "results": results})

    if analysis_type == "triangles":
        results = analyzer.find_triangles(top_k=top_k)
        return _json_response({"status": "success", "analysis": "triangles", "results": results})

    if analysis_type == "unused_symbols":
        node_type_filter = category.split(",") if category else None
        results = analyzer.find_unused_symbols(node_types=node_type_filter)
        return _json_response(
            {
                "status": "success",
                "analysis": "unused_symbols",
                "results": results,
                "count": len(results),
            },
        )

    if analysis_type == "refactoring":
        results = analyzer.find_refactoring_candidates(top_k=top_k)
        return _json_response(
            {
                "status": "success",
                "analysis": "refactoring",
                "results": results,
                "count": len(results),
            },
        )

    return _json_response({"status": "error", "message": f"Unknown analysis_type: {analysis_type}"})

code_graph_create

code_graph_create(graph_id, description='')

Initialize an empty code graph for structural analysis of a codebase.

USE THIS TOOL: - At the start of analysis, BEFORE running LSP/AST-grep tools - When you need to unify results from multiple discovery tools - When you want to run graph algorithms (hotspots, modules, coupling)

DO NOT USE: - If a graph with this ID already exists (will overwrite it) - For simple single-file analysis (use LSP tools directly)

The graph is stored in memory for the session. Populate it using: - code_graph_ingest_lsp: Add symbols, references, definitions from LSP - code_graph_ingest_astgrep: Add business logic patterns - code_graph_ingest_tests: Add test coverage relationships

Parameters:

Name Type Description Default
graph_id str

Unique identifier for this graph. Use descriptive names: - "main": Primary analysis graph for the whole codebase - "feature_auth": Graph focused on authentication code - "module_api": Graph for API layer only

required
description str

Human-readable description of what this graph represents. Helps when managing multiple graphs.

''

Returns:

Name Type Description
JSON str

{"status": "success", "graph_id": "...", "message": "..."}

Output Size: ~100 bytes

Workflow
  1. code_graph_create("main") # Initialize
  2. lsp_start(...) + lsp_document_symbols(...) # Discover
  3. code_graph_ingest_lsp(...) # Populate
  4. code_graph_analyze("main", "hotspots") # Analyze
  5. code_graph_save("main", ".code-context/graph.json") # Persist
Example

code_graph_create("main", "Full codebase analysis") code_graph_create("backend", "Backend services only")

Source code in src/code_context_agent/tools/graph/tools.py
@tool
def code_graph_create(
    graph_id: str,
    description: str = "",
) -> str:
    """Initialize an empty code graph for structural analysis of a codebase.

    USE THIS TOOL:
    - At the start of analysis, BEFORE running LSP/AST-grep tools
    - When you need to unify results from multiple discovery tools
    - When you want to run graph algorithms (hotspots, modules, coupling)

    DO NOT USE:
    - If a graph with this ID already exists (will overwrite it)
    - For simple single-file analysis (use LSP tools directly)

    The graph is stored in memory for the session. Populate it using:
    - code_graph_ingest_lsp: Add symbols, references, definitions from LSP
    - code_graph_ingest_astgrep: Add business logic patterns
    - code_graph_ingest_tests: Add test coverage relationships

    Args:
        graph_id: Unique identifier for this graph. Use descriptive names:
            - "main": Primary analysis graph for the whole codebase
            - "feature_auth": Graph focused on authentication code
            - "module_api": Graph for API layer only
        description: Human-readable description of what this graph represents.
            Helps when managing multiple graphs.

    Returns:
        JSON: {"status": "success", "graph_id": "...", "message": "..."}

    Output Size: ~100 bytes

    Workflow:
        1. code_graph_create("main")           # Initialize
        2. lsp_start(...) + lsp_document_symbols(...)  # Discover
        3. code_graph_ingest_lsp(...)          # Populate
        4. code_graph_analyze("main", "hotspots")  # Analyze
        5. code_graph_save("main", ".code-context/graph.json")  # Persist

    Example:
        code_graph_create("main", "Full codebase analysis")
        code_graph_create("backend", "Backend services only")
    """
    _graphs[graph_id] = CodeGraph()
    # Reset explorer for this graph
    _explorers.pop(graph_id, None)

    return _json_response(
        {
            "status": "success",
            "graph_id": graph_id,
            "description": description,
            "message": f"Created new code graph: {graph_id}",
        },
    )

code_graph_explore

code_graph_explore(
    graph_id,
    action,
    node_id="",
    module_id=-1,
    target_node="",
    depth=1,
    category="",
)

Progressively explore the code graph to build context incrementally.

USE THIS TOOL: - ALWAYS start with "overview" action first - When you need to understand the codebase step by step - To get suggestions on where to explore next - To track what you've already explored

DO NOT USE: - For running analysis algorithms (use code_graph_analyze instead) - On an empty graph (ingest data first)

Progressive disclosure pattern: 1. "overview" → Get entry points, hotspots, modules, foundations 2. Pick interesting nodes from overview 3. "expand_node" → See neighbors and relationships 4. Repeat until sufficient context is gathered

The explorer tracks visited nodes and suggests what to explore next.

Actions:

Starting point: - "overview": Returns high-level structure. Includes: - entry_points: Where execution starts - hotspots: Bottleneck code (top 5) - modules: Detected clusters with key nodes - foundations: Core infrastructure (top 5) Always start here to orient yourself.

Drill-down: - "expand_node": BFS expansion from a node. See immediate neighbors and their relationships. Good for understanding a specific area. - "expand_module": Deep-dive into a detected module. Shows internal structure and external connections. - "category": Explore all nodes in a business logic category. Groups results by file.

Navigation: - "path": Find shortest path between two nodes. Useful for understanding how components connect. - "status": Check exploration coverage (% of nodes visited). - "reset": Clear exploration state to start fresh.

Parameters:

Name Type Description Default
graph_id str

ID of the graph to explore (must have data from ingestion)

required
action str

Exploration action. One of: - "overview": No additional params needed - "expand_node": Requires node_id, optional depth - "expand_module": Requires module_id (from overview/modules analysis) - "path": Requires node_id (source) and target_node - "category": Requires category (e.g., "db", "auth") - "status": No additional params - "reset": No additional params

required
node_id str

For "expand_node": Node ID to expand from. For "path": Source node. Format: "file_path:symbol_name"

''
module_id int

For "expand_module": Module ID from detect_modules results. Typically 0, 1, 2, etc. from the overview.

-1
target_node str

For "path": Destination node ID.

''
depth int

For "expand_node": How many hops to expand. - depth=1: Direct neighbors only (fast, focused) - depth=2: Neighbors of neighbors (broader context) - depth=3+: Rarely needed, can be large

1
category str

For "category": Business logic category name. Values from AST-grep: "db", "auth", "http", "validation", etc.

''

Returns:

Name Type Description
str

JSON with exploration results. Always includes "explored_count".

overview str
str

{ "entry_points": [...], "hotspots": [...], "modules": [{"module_id": 0, "size": 15, "key_nodes": [...]}], "foundations": [...], "explored_count": 25

str

}

expand_node str
str

{ "center": "src/api.py:handler", "discovered_nodes": [...], "edges": [...], "suggested_next": [...], # What to explore next "explored_count": 40

str

}

Output Size: 2-20KB depending on action and graph size

Workflow Example:

1. Start with overview

overview = code_graph_explore("main", "overview")

Look at entry_points and hotspots

2. Expand from interesting hotspot

details = code_graph_explore("main", "expand_node", node_id=overview["hotspots"][0]["id"], depth=2)

See neighbors and suggested_next

3. Explore a module

module_details = code_graph_explore("main", "expand_module", module_id=0)

See internal structure and external connections

4. Check coverage

status = code_graph_explore("main", "status")

coverage_percent shows how much of graph was explored

Source code in src/code_context_agent/tools/graph/tools.py
@tool
def code_graph_explore(  # noqa: C901
    graph_id: str,
    action: str,
    node_id: str = "",
    module_id: int = -1,
    target_node: str = "",
    depth: int = 1,
    category: str = "",
) -> str:
    """Progressively explore the code graph to build context incrementally.

    USE THIS TOOL:
    - ALWAYS start with "overview" action first
    - When you need to understand the codebase step by step
    - To get suggestions on where to explore next
    - To track what you've already explored

    DO NOT USE:
    - For running analysis algorithms (use code_graph_analyze instead)
    - On an empty graph (ingest data first)

    Progressive disclosure pattern:
    1. "overview" → Get entry points, hotspots, modules, foundations
    2. Pick interesting nodes from overview
    3. "expand_node" → See neighbors and relationships
    4. Repeat until sufficient context is gathered

    The explorer tracks visited nodes and suggests what to explore next.

    Actions:

    **Starting point:**
    - "overview": Returns high-level structure. Includes:
      - entry_points: Where execution starts
      - hotspots: Bottleneck code (top 5)
      - modules: Detected clusters with key nodes
      - foundations: Core infrastructure (top 5)
      Always start here to orient yourself.

    **Drill-down:**
    - "expand_node": BFS expansion from a node. See immediate neighbors
      and their relationships. Good for understanding a specific area.
    - "expand_module": Deep-dive into a detected module. Shows internal
      structure and external connections.
    - "category": Explore all nodes in a business logic category.
      Groups results by file.

    **Navigation:**
    - "path": Find shortest path between two nodes. Useful for
      understanding how components connect.
    - "status": Check exploration coverage (% of nodes visited).
    - "reset": Clear exploration state to start fresh.

    Args:
        graph_id: ID of the graph to explore (must have data from ingestion)
        action: Exploration action. One of:
            - "overview": No additional params needed
            - "expand_node": Requires node_id, optional depth
            - "expand_module": Requires module_id (from overview/modules analysis)
            - "path": Requires node_id (source) and target_node
            - "category": Requires category (e.g., "db", "auth")
            - "status": No additional params
            - "reset": No additional params
        node_id: For "expand_node": Node ID to expand from.
            For "path": Source node. Format: "file_path:symbol_name"
        module_id: For "expand_module": Module ID from detect_modules results.
            Typically 0, 1, 2, etc. from the overview.
        target_node: For "path": Destination node ID.
        depth: For "expand_node": How many hops to expand.
            - depth=1: Direct neighbors only (fast, focused)
            - depth=2: Neighbors of neighbors (broader context)
            - depth=3+: Rarely needed, can be large
        category: For "category": Business logic category name.
            Values from AST-grep: "db", "auth", "http", "validation", etc.

    Returns:
        JSON with exploration results. Always includes "explored_count".

        overview:
        {
            "entry_points": [...],
            "hotspots": [...],
            "modules": [{"module_id": 0, "size": 15, "key_nodes": [...]}],
            "foundations": [...],
            "explored_count": 25
        }

        expand_node:
        {
            "center": "src/api.py:handler",
            "discovered_nodes": [...],
            "edges": [...],
            "suggested_next": [...],  # What to explore next
            "explored_count": 40
        }

    Output Size: 2-20KB depending on action and graph size

    Workflow Example:

    # 1. Start with overview
    overview = code_graph_explore("main", "overview")
    # Look at entry_points and hotspots

    # 2. Expand from interesting hotspot
    details = code_graph_explore("main", "expand_node",
                                  node_id=overview["hotspots"][0]["id"],
                                  depth=2)
    # See neighbors and suggested_next

    # 3. Explore a module
    module_details = code_graph_explore("main", "expand_module", module_id=0)
    # See internal structure and external connections

    # 4. Check coverage
    status = code_graph_explore("main", "status")
    # coverage_percent shows how much of graph was explored
    """
    explorer = _get_explorer(graph_id)
    if explorer is None:
        return _json_response({"status": "error", "message": f"Graph not found: {graph_id}"})

    if action == "overview":
        results = explorer.get_overview()
        return _json_response({"status": "success", "action": "overview", **results})

    if action == "expand_node":
        if not node_id:
            return _json_response({"status": "error", "message": "node_id required for expand_node"})
        results = explorer.expand_node(node_id, depth)
        return _json_response({"status": "success", "action": "expand_node", **results})

    if action == "expand_module":
        if module_id < 0:
            return _json_response({"status": "error", "message": "module_id required for expand_module"})
        results = explorer.expand_module(module_id)
        return _json_response({"status": "success", "action": "expand_module", **results})

    if action == "path":
        if not node_id or not target_node:
            return _json_response({"status": "error", "message": "node_id and target_node required for path"})
        results = explorer.get_path_between(node_id, target_node)
        return _json_response({"status": "success", "action": "path", **results})

    if action == "category":
        if not category:
            return _json_response({"status": "error", "message": "category required for category exploration"})
        results = explorer.explore_category(category)
        return _json_response({"status": "success", "action": "category", **results})

    if action == "status":
        results = explorer.get_exploration_status()
        return _json_response({"status": "success", "action": "status", **results})

    if action == "reset":
        explorer.reset_exploration()
        return _json_response({"status": "success", "action": "reset", "message": "Exploration state reset"})

    return _json_response({"status": "error", "message": f"Unknown action: {action}"})

code_graph_export

code_graph_export(
    graph_id,
    format="json",
    include_metadata=True,
    max_nodes=100,
)

Export the code graph for visualization or external analysis.

USE THIS TOOL: - To generate Mermaid diagrams for CONTEXT.md architecture section - To save graph data for external visualization tools - After analysis, to capture the graph structure

DO NOT USE: - For persistence (use code_graph_save instead) - On empty graphs (ingest data first)

Export formats:

"mermaid" (recommended for documentation): Generates Mermaid diagram syntax that can be embedded in markdown. - Selects top nodes by degree (most connected = most important) - Uses shapes based on node type: - [name]: Classes (rectangles) - (name): Functions/methods (rounded) - [[name]]: Files (stadium shape) - Edge styles by relationship: - → : calls - -.-> : imports - ==> : inherits

"json" (for external tools): NetworkX node-link format. Can be loaded into other graph tools.

Parameters:

Name Type Description Default
graph_id str

ID of the graph to export (must exist)

required
format str

Export format: - "mermaid": Mermaid diagram syntax (for markdown embedding) - "json": NetworkX node-link JSON (for external tools)

'json'
include_metadata bool

For "json" format only. Whether to include node/edge metadata (file_path, line numbers, etc.). Set False for smaller output.

True
max_nodes int

For "mermaid" format only. Maximum nodes to include. Mermaid diagrams become unreadable with too many nodes. Recommended: 15 for CONTEXT.md, up to 50 for detailed diagrams. Nodes are selected by degree (most connected first).

100

Returns:

Type Description
str

For "mermaid":

str

{ "status": "success", "format": "mermaid", "diagram": "graph TD\n node1[Name] → node2..."

str

}

str

For "json":

str

{ "status": "success", "format": "json", "graph": {"nodes": [...], "links": [...]}

str

}

Output Size
  • mermaid: 1-5KB (limited by max_nodes)
  • json: 10-500KB (depends on graph size)

Workflow Example:

Export for CONTEXT.md architecture diagram

result = code_graph_export("main", format="mermaid", max_nodes=15) mermaid_code = result["diagram"]

Embed in markdown:

```mermaid

{mermaid_code}

```

Export for external visualization

result = code_graph_export("main", format="json", include_metadata=True)

Use with Gephi, D3.js, etc.

Source code in src/code_context_agent/tools/graph/tools.py
@tool
def code_graph_export(
    graph_id: str,
    format: str = "json",
    include_metadata: bool = True,
    max_nodes: int = 100,
) -> str:
    """Export the code graph for visualization or external analysis.

    USE THIS TOOL:
    - To generate Mermaid diagrams for CONTEXT.md architecture section
    - To save graph data for external visualization tools
    - After analysis, to capture the graph structure

    DO NOT USE:
    - For persistence (use code_graph_save instead)
    - On empty graphs (ingest data first)

    Export formats:

    **"mermaid"** (recommended for documentation):
    Generates Mermaid diagram syntax that can be embedded in markdown.
    - Selects top nodes by degree (most connected = most important)
    - Uses shapes based on node type:
      - [name]: Classes (rectangles)
      - (name): Functions/methods (rounded)
      - [[name]]: Files (stadium shape)
    - Edge styles by relationship:
      - --> : calls
      - -.-> : imports
      - ==> : inherits

    **"json"** (for external tools):
    NetworkX node-link format. Can be loaded into other graph tools.

    Args:
        graph_id: ID of the graph to export (must exist)
        format: Export format:
            - "mermaid": Mermaid diagram syntax (for markdown embedding)
            - "json": NetworkX node-link JSON (for external tools)
        include_metadata: For "json" format only. Whether to include
            node/edge metadata (file_path, line numbers, etc.).
            Set False for smaller output.
        max_nodes: For "mermaid" format only. Maximum nodes to include.
            Mermaid diagrams become unreadable with too many nodes.
            Recommended: 15 for CONTEXT.md, up to 50 for detailed diagrams.
            Nodes are selected by degree (most connected first).

    Returns:
        For "mermaid":
        {
            "status": "success",
            "format": "mermaid",
            "diagram": "graph TD\\n    node1[Name] --> node2..."
        }

        For "json":
        {
            "status": "success",
            "format": "json",
            "graph": {"nodes": [...], "links": [...]}
        }

    Output Size:
        - mermaid: 1-5KB (limited by max_nodes)
        - json: 10-500KB (depends on graph size)

    Workflow Example:

    # Export for CONTEXT.md architecture diagram
    result = code_graph_export("main", format="mermaid", max_nodes=15)
    mermaid_code = result["diagram"]
    # Embed in markdown:
    # ```mermaid
    # {mermaid_code}
    # ```

    # Export for external visualization
    result = code_graph_export("main", format="json", include_metadata=True)
    # Use with Gephi, D3.js, etc.
    """
    graph = _get_graph(graph_id)
    if graph is None:
        return _json_response({"status": "error", "message": f"Graph not found: {graph_id}"})

    if format == "json":
        data = graph.to_node_link_data()
        if not include_metadata:
            # Strip metadata
            for node in data.get("nodes", []):
                node.pop("metadata", None)
            for link in data.get("links", []):
                link.pop("metadata", None)
        return _json_response({"status": "success", "format": "json", "graph": data})

    if format == "mermaid":
        mermaid = _export_mermaid(graph, max_nodes)
        return _json_response({"status": "success", "format": "mermaid", "diagram": mermaid})

    return _json_response({"status": "error", "message": f"Unknown format: {format}"})

code_graph_ingest_astgrep

code_graph_ingest_astgrep(
    graph_id, astgrep_result, result_type="rule_pack"
)

Add AST-grep pattern matches to the graph as categorized business logic nodes.

USE THIS TOOL: - After running astgrep_scan_rule_pack to add business logic patterns - After running astgrep_scan for custom pattern matches - When you want graph analysis to consider business logic categories

DO NOT USE: - Before code_graph_create (graph must exist first) - With empty AST-grep results (check match count first)

AST-grep matches become nodes with rich metadata: - category: "db", "auth", "http", "validation", etc. - severity: "error" (writes), "warning" (reads), "hint" (definitions) - rule_id: The specific pattern that matched

This metadata enables category-based analysis: - code_graph_analyze("main", "category", category="db") - code_graph_explore("main", "category", category="auth")

Parameters:

Name Type Description Default
graph_id str

ID of the target graph (must exist from code_graph_create)

required
astgrep_result str

The raw JSON string output from astgrep_scan or astgrep_scan_rule_pack. Pass the exact return value.

required
result_type str

Source of the AST-grep result: - "rule_pack" (default): From astgrep_scan_rule_pack. Results include category, severity, rule_id metadata. Use this for business logic detection. - "scan": From astgrep_scan ad-hoc patterns. Results have pattern info but no category metadata.

'rule_pack'

Returns:

Type Description
str

JSON with ingestion results:

str

{ "status": "success", "nodes_added": 25, "categories": ["db", "auth", "validation"], "total_nodes": 175

str

}

Output Size: ~300 bytes

Common Errors
  • "Graph not found": Call code_graph_create first
  • "Invalid JSON": AST-grep result is malformed
Workflow Example

Run rule pack for Python business logic

matches = astgrep_scan_rule_pack("py_business_logic", repo_path)

Ingest into graph

code_graph_ingest_astgrep("main", matches, "rule_pack")

Now analyze by category

db_ops = code_graph_analyze("main", "category", category="db") auth_ops = code_graph_analyze("main", "category", category="auth")

Source code in src/code_context_agent/tools/graph/tools.py
@tool
def code_graph_ingest_astgrep(
    graph_id: str,
    astgrep_result: str,
    result_type: str = "rule_pack",
) -> str:
    """Add AST-grep pattern matches to the graph as categorized business logic nodes.

    USE THIS TOOL:
    - After running astgrep_scan_rule_pack to add business logic patterns
    - After running astgrep_scan for custom pattern matches
    - When you want graph analysis to consider business logic categories

    DO NOT USE:
    - Before code_graph_create (graph must exist first)
    - With empty AST-grep results (check match count first)

    AST-grep matches become nodes with rich metadata:
    - category: "db", "auth", "http", "validation", etc.
    - severity: "error" (writes), "warning" (reads), "hint" (definitions)
    - rule_id: The specific pattern that matched

    This metadata enables category-based analysis:
    - code_graph_analyze("main", "category", category="db")
    - code_graph_explore("main", "category", category="auth")

    Args:
        graph_id: ID of the target graph (must exist from code_graph_create)
        astgrep_result: The raw JSON string output from astgrep_scan or
            astgrep_scan_rule_pack. Pass the exact return value.
        result_type: Source of the AST-grep result:
            - "rule_pack" (default): From astgrep_scan_rule_pack.
              Results include category, severity, rule_id metadata.
              Use this for business logic detection.
            - "scan": From astgrep_scan ad-hoc patterns.
              Results have pattern info but no category metadata.

    Returns:
        JSON with ingestion results:
        {
            "status": "success",
            "nodes_added": 25,
            "categories": ["db", "auth", "validation"],
            "total_nodes": 175
        }

    Output Size: ~300 bytes

    Common Errors:
        - "Graph not found": Call code_graph_create first
        - "Invalid JSON": AST-grep result is malformed

    Workflow Example:
        # Run rule pack for Python business logic
        matches = astgrep_scan_rule_pack("py_business_logic", repo_path)

        # Ingest into graph
        code_graph_ingest_astgrep("main", matches, "rule_pack")

        # Now analyze by category
        db_ops = code_graph_analyze("main", "category", category="db")
        auth_ops = code_graph_analyze("main", "category", category="auth")
    """
    graph = _get_graph(graph_id)
    if graph is None:
        return _json_response({"status": "error", "message": f"Graph not found: {graph_id}"})

    try:
        result = json.loads(astgrep_result)
    except json.JSONDecodeError as e:
        return _json_response({"status": "error", "message": f"Invalid JSON: {e}"})

    nodes_added = 0
    categories: set[str] = set()

    if result_type == "rule_pack":
        nodes = ingest_astgrep_rule_pack(result)
    else:
        nodes = ingest_astgrep_matches(result)

    for node in nodes:
        if not graph.has_node(node.id):
            graph.add_node(node)
            nodes_added += 1
            if "category" in node.metadata:
                categories.add(node.metadata["category"])

    return _json_response(
        {
            "status": "success",
            "graph_id": graph_id,
            "result_type": result_type,
            "nodes_added": nodes_added,
            "categories": list(categories),
            "total_nodes": graph.node_count,
        },
    )

code_graph_ingest_inheritance

code_graph_ingest_inheritance(
    graph_id, hover_content, class_node_id, file_path
)

Add class inheritance/implementation edges from LSP hover information.

USE THIS TOOL: - After lsp_hover on a class to capture extends/implements relationships - When building class hierarchy for OOP codebases - In DEEP mode for comprehensive type analysis

DO NOT USE: - On non-class symbols (functions, variables) - Without first creating the class node via code_graph_ingest_lsp

Parses class signatures to create edges: - "inherits" edges: class Foo extends Bar → Foo --inherits→ Bar - "implements" edges: class Foo implements IBar → Foo --implements→ IBar

Works with common patterns: - TypeScript/JavaScript: extends, implements - Python: class Foo(Bar, Baz) - Java: extends, implements

Parameters:

Name Type Description Default
graph_id str

ID of the target graph (must exist from code_graph_create)

required
hover_content str

The markdown/text content from lsp_hover result. Extract the "value" field from the hover response. Example: "class UserService extends BaseService implements IUserService"

required
class_node_id str

The node ID of the class in the graph. Format: "file_path:ClassName" (e.g., "src/services/user.ts:UserService") Must match the ID created by code_graph_ingest_lsp.

required
file_path str

Path to the file containing this class. Used to resolve base class locations.

required

Returns:

Name Type Description
JSON str

{"status": "success", "edges_added": N, "edge_types": ["inherits", "implements"]}

Output Size: ~200 bytes

Workflow Example

Get class symbols

symbols = lsp_document_symbols(session_id, "src/user.ts") code_graph_ingest_lsp("main", symbols, "symbols", source_file="src/user.ts")

For each class, get hover info and ingest inheritance

hover = lsp_hover(session_id, "src/user.ts", class_line, 0) hover_content = hover["hover"]["contents"]["value"] code_graph_ingest_inheritance("main", hover_content, "src/user.ts:UserService", "src/user.ts")

Source code in src/code_context_agent/tools/graph/tools.py
@tool
def code_graph_ingest_inheritance(
    graph_id: str,
    hover_content: str,
    class_node_id: str,
    file_path: str,
) -> str:
    """Add class inheritance/implementation edges from LSP hover information.

    USE THIS TOOL:
    - After lsp_hover on a class to capture extends/implements relationships
    - When building class hierarchy for OOP codebases
    - In DEEP mode for comprehensive type analysis

    DO NOT USE:
    - On non-class symbols (functions, variables)
    - Without first creating the class node via code_graph_ingest_lsp

    Parses class signatures to create edges:
    - "inherits" edges: class Foo extends Bar → Foo --inherits--> Bar
    - "implements" edges: class Foo implements IBar → Foo --implements--> IBar

    Works with common patterns:
    - TypeScript/JavaScript: extends, implements
    - Python: class Foo(Bar, Baz)
    - Java: extends, implements

    Args:
        graph_id: ID of the target graph (must exist from code_graph_create)
        hover_content: The markdown/text content from lsp_hover result.
            Extract the "value" field from the hover response.
            Example: "class UserService extends BaseService implements IUserService"
        class_node_id: The node ID of the class in the graph.
            Format: "file_path:ClassName" (e.g., "src/services/user.ts:UserService")
            Must match the ID created by code_graph_ingest_lsp.
        file_path: Path to the file containing this class.
            Used to resolve base class locations.

    Returns:
        JSON: {"status": "success", "edges_added": N, "edge_types": ["inherits", "implements"]}

    Output Size: ~200 bytes

    Workflow Example:
        # Get class symbols
        symbols = lsp_document_symbols(session_id, "src/user.ts")
        code_graph_ingest_lsp("main", symbols, "symbols", source_file="src/user.ts")

        # For each class, get hover info and ingest inheritance
        hover = lsp_hover(session_id, "src/user.ts", class_line, 0)
        hover_content = hover["hover"]["contents"]["value"]
        code_graph_ingest_inheritance("main", hover_content,
                                      "src/user.ts:UserService", "src/user.ts")
    """
    graph = _get_graph(graph_id)
    if graph is None:
        return _json_response({"status": "error", "message": f"Graph not found: {graph_id}"})

    edges = ingest_inheritance(hover_content, class_node_id, file_path)
    edges_added = 0

    for edge in edges:
        graph.add_edge(edge)
        edges_added += 1

    return _json_response(
        {
            "status": "success",
            "graph_id": graph_id,
            "edges_added": edges_added,
            "edge_types": ["inherits", "implements"],
            "total_edges": graph.edge_count,
        },
    )

code_graph_ingest_lsp

code_graph_ingest_lsp(
    graph_id,
    lsp_result,
    result_type,
    source_file="",
    source_symbol="",
)

Add LSP tool results to the code graph as nodes and edges.

USE THIS TOOL: - After calling lsp_document_symbols to add function/class nodes - After calling lsp_references to add "references" edges (fan-in data) - After calling lsp_definition to add "calls" edges (call relationships)

DO NOT USE: - Before calling code_graph_create (graph must exist first) - With invalid/empty LSP results (check LSP tool status first)

Converts raw LSP data into graph structure: - "symbols" → Creates nodes for functions, classes, methods, variables - "references" → Creates edges showing where a symbol is used - "definition" → Creates edges showing what a symbol calls/uses

Parameters:

Name Type Description Default
graph_id str

ID of the target graph (must exist from code_graph_create)

required
lsp_result str

The raw JSON string output from an LSP tool. Pass the exact return value from lsp_document_symbols, lsp_references, or lsp_definition.

required
result_type str

Type of LSP result being ingested: - "symbols": From lsp_document_symbols. Creates nodes. REQUIRES source_file parameter. - "references": From lsp_references. Creates reference edges. REQUIRES source_symbol parameter (format: "file:name"). - "definition": From lsp_definition. Creates call/import edges.

required
source_file str

Required for "symbols" type. The file path that was analyzed (e.g., "src/main.py"). Used to create node IDs.

''
source_symbol str

Required for "references" type. The symbol ID that references point TO (format: "src/main.py:my_function").

''

Returns:

Type Description
str

JSON with ingestion results:

str

{ "status": "success", "nodes_added": 15, # New nodes created "edges_added": 8, # New edges created "total_nodes": 150, # Graph totals "total_edges": 200

str

}

Output Size: ~200 bytes

Common Errors
  • "Graph not found": Call code_graph_create first
  • "source_file required": Must provide source_file for "symbols"
  • "source_symbol required": Must provide source_symbol for "references"
  • "Invalid JSON": LSP result is malformed

Workflow Examples:

Ingesting symbols (creates nodes): symbols = lsp_document_symbols(session_id, "src/api.py") code_graph_ingest_lsp("main", symbols, "symbols", source_file="src/api.py")

Ingesting references (creates edges showing fan-in): refs = lsp_references(session_id, "src/api.py", 10, 5) code_graph_ingest_lsp("main", refs, "references", source_symbol="src/api.py:handle_request")

Ingesting definitions (creates call edges): defn = lsp_definition(session_id, "src/api.py", 15, 20) code_graph_ingest_lsp("main", defn, "definition")

Source code in src/code_context_agent/tools/graph/tools.py
@tool
def code_graph_ingest_lsp(  # noqa: C901
    graph_id: str,
    lsp_result: str,
    result_type: str,
    source_file: str = "",
    source_symbol: str = "",
) -> str:
    """Add LSP tool results to the code graph as nodes and edges.

    USE THIS TOOL:
    - After calling lsp_document_symbols to add function/class nodes
    - After calling lsp_references to add "references" edges (fan-in data)
    - After calling lsp_definition to add "calls" edges (call relationships)

    DO NOT USE:
    - Before calling code_graph_create (graph must exist first)
    - With invalid/empty LSP results (check LSP tool status first)

    Converts raw LSP data into graph structure:
    - "symbols" → Creates nodes for functions, classes, methods, variables
    - "references" → Creates edges showing where a symbol is used
    - "definition" → Creates edges showing what a symbol calls/uses

    Args:
        graph_id: ID of the target graph (must exist from code_graph_create)
        lsp_result: The raw JSON string output from an LSP tool.
            Pass the exact return value from lsp_document_symbols,
            lsp_references, or lsp_definition.
        result_type: Type of LSP result being ingested:
            - "symbols": From lsp_document_symbols. Creates nodes.
              REQUIRES source_file parameter.
            - "references": From lsp_references. Creates reference edges.
              REQUIRES source_symbol parameter (format: "file:name").
            - "definition": From lsp_definition. Creates call/import edges.
        source_file: Required for "symbols" type. The file path that was
            analyzed (e.g., "src/main.py"). Used to create node IDs.
        source_symbol: Required for "references" type. The symbol ID that
            references point TO (format: "src/main.py:my_function").

    Returns:
        JSON with ingestion results:
        {
            "status": "success",
            "nodes_added": 15,      # New nodes created
            "edges_added": 8,       # New edges created
            "total_nodes": 150,     # Graph totals
            "total_edges": 200
        }

    Output Size: ~200 bytes

    Common Errors:
        - "Graph not found": Call code_graph_create first
        - "source_file required": Must provide source_file for "symbols"
        - "source_symbol required": Must provide source_symbol for "references"
        - "Invalid JSON": LSP result is malformed

    Workflow Examples:

    Ingesting symbols (creates nodes):
        symbols = lsp_document_symbols(session_id, "src/api.py")
        code_graph_ingest_lsp("main", symbols, "symbols", source_file="src/api.py")

    Ingesting references (creates edges showing fan-in):
        refs = lsp_references(session_id, "src/api.py", 10, 5)
        code_graph_ingest_lsp("main", refs, "references",
                              source_symbol="src/api.py:handle_request")

    Ingesting definitions (creates call edges):
        defn = lsp_definition(session_id, "src/api.py", 15, 20)
        code_graph_ingest_lsp("main", defn, "definition")
    """
    graph = _get_graph(graph_id)
    if graph is None:
        return _json_response({"status": "error", "message": f"Graph not found: {graph_id}"})

    try:
        result = json.loads(lsp_result)
    except json.JSONDecodeError as e:
        return _json_response({"status": "error", "message": f"Invalid JSON: {e}"})

    nodes_added = 0
    edges_added = 0

    if result_type == "symbols":
        if not source_file:
            return _json_response({"status": "error", "message": "source_file required for symbols"})
        nodes, edges = ingest_lsp_symbols(result, source_file)
        for node in nodes:
            if not graph.has_node(node.id):
                graph.add_node(node)
                nodes_added += 1
        for edge in edges:
            graph.add_edge(edge)
            edges_added += 1

    elif result_type == "references":
        if not source_symbol:
            return _json_response({"status": "error", "message": "source_symbol required for references"})
        edges = ingest_lsp_references(result, source_symbol)
        for edge in edges:
            graph.add_edge(edge)
            edges_added += 1

    elif result_type == "definition":
        # Extract source location from result
        from_file = result.get("file", source_file)
        from_line = result.get("position", {}).get("line", 0)
        edges = ingest_lsp_definition(result, from_file, from_line)
        for edge in edges:
            graph.add_edge(edge)
            edges_added += 1

    else:
        return _json_response({"status": "error", "message": f"Unknown result_type: {result_type}"})

    return _json_response(
        {
            "status": "success",
            "graph_id": graph_id,
            "result_type": result_type,
            "nodes_added": nodes_added,
            "edges_added": edges_added,
            "total_nodes": graph.node_count,
            "total_edges": graph.edge_count,
        },
    )

code_graph_ingest_rg

code_graph_ingest_rg(graph_id, rg_result)

Add ripgrep search matches to the graph as preliminary nodes.

USE THIS TOOL: - When LSP doesn't cover a language/pattern - For text-based patterns (SQL keywords, config values, comments) - As a fallback when semantic analysis isn't available

DO NOT USE: - When LSP symbols are available (prefer code_graph_ingest_lsp) - For structural patterns (prefer code_graph_ingest_astgrep)

Creates lightweight nodes from text matches. These nodes have: - file_path and line number - matched text content - No semantic type information (unlike LSP nodes)

Ripgrep nodes are useful for: - Finding TODO/FIXME comments - Locating hardcoded values - Identifying SQL queries in strings

Parameters:

Name Type Description Default
graph_id str

ID of the target graph (must exist from code_graph_create)

required
rg_result str

The raw JSON string output from rg_search tool. Pass the exact return value.

required

Returns:

Name Type Description
JSON str

{"status": "success", "nodes_added": N, "total_nodes": M}

Output Size: ~150 bytes

Workflow Example

Find all SQL queries

sql_matches = rg_search("SELECT|INSERT|UPDATE|DELETE", repo_path) code_graph_ingest_rg("main", sql_matches)

Source code in src/code_context_agent/tools/graph/tools.py
@tool
def code_graph_ingest_rg(
    graph_id: str,
    rg_result: str,
) -> str:
    """Add ripgrep search matches to the graph as preliminary nodes.

    USE THIS TOOL:
    - When LSP doesn't cover a language/pattern
    - For text-based patterns (SQL keywords, config values, comments)
    - As a fallback when semantic analysis isn't available

    DO NOT USE:
    - When LSP symbols are available (prefer code_graph_ingest_lsp)
    - For structural patterns (prefer code_graph_ingest_astgrep)

    Creates lightweight nodes from text matches. These nodes have:
    - file_path and line number
    - matched text content
    - No semantic type information (unlike LSP nodes)

    Ripgrep nodes are useful for:
    - Finding TODO/FIXME comments
    - Locating hardcoded values
    - Identifying SQL queries in strings

    Args:
        graph_id: ID of the target graph (must exist from code_graph_create)
        rg_result: The raw JSON string output from rg_search tool.
            Pass the exact return value.

    Returns:
        JSON: {"status": "success", "nodes_added": N, "total_nodes": M}

    Output Size: ~150 bytes

    Workflow Example:
        # Find all SQL queries
        sql_matches = rg_search("SELECT|INSERT|UPDATE|DELETE", repo_path)
        code_graph_ingest_rg("main", sql_matches)
    """
    graph = _get_graph(graph_id)
    if graph is None:
        return _json_response({"status": "error", "message": f"Graph not found: {graph_id}"})

    try:
        result = json.loads(rg_result)
    except json.JSONDecodeError as e:
        return _json_response({"status": "error", "message": f"Invalid JSON: {e}"})

    nodes = ingest_rg_matches(result)
    nodes_added = 0

    for node in nodes:
        if not graph.has_node(node.id):
            graph.add_node(node)
            nodes_added += 1

    return _json_response(
        {
            "status": "success",
            "graph_id": graph_id,
            "nodes_added": nodes_added,
            "total_nodes": graph.node_count,
        },
    )

code_graph_ingest_tests

code_graph_ingest_tests(
    graph_id, test_files, production_files
)

Add test-to-production file mappings as "tests" edges in the graph.

USE THIS TOOL: - After identifying test files (via rg_search for test patterns) - To enable test coverage analysis on business logic - To find untested hotspots in the codebase

DO NOT USE: - With unfiltered file lists (only include actual test files) - Before adding production file nodes to the graph

Creates "tests" edges based on naming convention matching: - test_foo.py → foo.py - foo.test.ts → foo.ts - FooTest.java → Foo.java - tests/foo.test.js → src/foo.js

These edges enable: - Finding untested business logic (nodes without incoming test edges) - Understanding test coverage per module - Prioritizing testing efforts on hotspots

Parameters:

Name Type Description Default
graph_id str

ID of the target graph (must exist from code_graph_create)

required
test_files str

JSON array of test file paths as a string. Example: '["tests/test_user.py", "tests/test_auth.py"]' Obtain from rg_search or file manifest filtering.

required
production_files str

JSON array of production file paths as a string. Example: '["src/user.py", "src/auth.py"]' Should include all files you want to map tests to.

required

Returns:

Name Type Description
JSON str

{"status": "success", "edges_added": N, "total_edges": M}

Output Size: ~150 bytes

Workflow Example

Find test files

test_matches = rg_search("def test_|it(|describe(", repo_path) test_files = extract_unique_files(test_matches)

Get production files from manifest

prod_files = filter_non_test_files(manifest)

Create test mapping edges

code_graph_ingest_tests("main", json.dumps(test_files), json.dumps(prod_files))

Find untested hotspots

hotspots = code_graph_analyze("main", "hotspots", top_k=10)

Check which have no incoming "tests" edges

Source code in src/code_context_agent/tools/graph/tools.py
@tool
def code_graph_ingest_tests(
    graph_id: str,
    test_files: str,
    production_files: str,
) -> str:
    """Add test-to-production file mappings as "tests" edges in the graph.

    USE THIS TOOL:
    - After identifying test files (via rg_search for test patterns)
    - To enable test coverage analysis on business logic
    - To find untested hotspots in the codebase

    DO NOT USE:
    - With unfiltered file lists (only include actual test files)
    - Before adding production file nodes to the graph

    Creates "tests" edges based on naming convention matching:
    - test_foo.py → foo.py
    - foo.test.ts → foo.ts
    - FooTest.java → Foo.java
    - __tests__/foo.test.js → src/foo.js

    These edges enable:
    - Finding untested business logic (nodes without incoming test edges)
    - Understanding test coverage per module
    - Prioritizing testing efforts on hotspots

    Args:
        graph_id: ID of the target graph (must exist from code_graph_create)
        test_files: JSON array of test file paths as a string.
            Example: '["tests/test_user.py", "tests/test_auth.py"]'
            Obtain from rg_search or file manifest filtering.
        production_files: JSON array of production file paths as a string.
            Example: '["src/user.py", "src/auth.py"]'
            Should include all files you want to map tests to.

    Returns:
        JSON: {"status": "success", "edges_added": N, "total_edges": M}

    Output Size: ~150 bytes

    Workflow Example:
        # Find test files
        test_matches = rg_search("def test_|it\\(|describe\\(", repo_path)
        test_files = extract_unique_files(test_matches)

        # Get production files from manifest
        prod_files = filter_non_test_files(manifest)

        # Create test mapping edges
        code_graph_ingest_tests("main",
                                json.dumps(test_files),
                                json.dumps(prod_files))

        # Find untested hotspots
        hotspots = code_graph_analyze("main", "hotspots", top_k=10)
        # Check which have no incoming "tests" edges
    """
    graph = _get_graph(graph_id)
    if graph is None:
        return _json_response({"status": "error", "message": f"Graph not found: {graph_id}"})

    try:
        tests = json.loads(test_files)
        prods = json.loads(production_files)
    except json.JSONDecodeError as e:
        return _json_response({"status": "error", "message": f"Invalid JSON: {e}"})

    edges = ingest_test_mapping(tests, prods)
    edges_added = 0

    for edge in edges:
        graph.add_edge(edge)
        edges_added += 1

    return _json_response(
        {
            "status": "success",
            "graph_id": graph_id,
            "edges_added": edges_added,
            "total_edges": graph.edge_count,
        },
    )

code_graph_load

code_graph_load(graph_id, file_path)

Load a previously saved code graph from disk.

USE THIS TOOL: - At the start of a session if .code-context/code_graph.json exists - To resume analysis from a previous session - To skip re-running LSP/AST-grep data collection

DO NOT USE: - If graph file doesn't exist (check with file system first) - When you need fresh analysis (create new graph instead)

Loading a saved graph restores: - All nodes with their metadata - All edges with their types - Ready for immediate analysis (code_graph_analyze, code_graph_explore)

Note: Loading replaces any existing graph with the same ID. The explorer state is reset (tracked exploration cleared).

Parameters:

Name Type Description Default
graph_id str

ID to assign to the loaded graph. Use: - "main": For the primary codebase graph - Descriptive names for scoped graphs

required
file_path str

Path to the saved graph file. Standard location: ".code-context/code_graph.json"

required

Returns:

Name Type Description
JSON str

{ "status": "success", "graph_id": "main", "path": ".code-context/code_graph.json", "nodes": 150, "edges": 200

str

}

Output Size: ~100 bytes

Common Errors
  • "Load failed": File not found or invalid JSON

Workflow Example:

Check if saved graph exists

If .code-context/code_graph.json exists:

code_graph_load("main", ".code-context/code_graph.json")

Graph is ready for analysis

hotspots = code_graph_analyze("main", "hotspots") overview = code_graph_explore("main", "overview")

No need to re-run lsp_* or astgrep_* tools!

Source code in src/code_context_agent/tools/graph/tools.py
@tool
def code_graph_load(
    graph_id: str,
    file_path: str,
) -> str:
    """Load a previously saved code graph from disk.

    USE THIS TOOL:
    - At the start of a session if .code-context/code_graph.json exists
    - To resume analysis from a previous session
    - To skip re-running LSP/AST-grep data collection

    DO NOT USE:
    - If graph file doesn't exist (check with file system first)
    - When you need fresh analysis (create new graph instead)

    Loading a saved graph restores:
    - All nodes with their metadata
    - All edges with their types
    - Ready for immediate analysis (code_graph_analyze, code_graph_explore)

    Note: Loading replaces any existing graph with the same ID.
    The explorer state is reset (tracked exploration cleared).

    Args:
        graph_id: ID to assign to the loaded graph. Use:
            - "main": For the primary codebase graph
            - Descriptive names for scoped graphs
        file_path: Path to the saved graph file.
            Standard location: ".code-context/code_graph.json"

    Returns:
        JSON: {
            "status": "success",
            "graph_id": "main",
            "path": ".code-context/code_graph.json",
            "nodes": 150,
            "edges": 200
        }

    Output Size: ~100 bytes

    Common Errors:
        - "Load failed": File not found or invalid JSON

    Workflow Example:

    # Check if saved graph exists
    # If .code-context/code_graph.json exists:
    code_graph_load("main", ".code-context/code_graph.json")

    # Graph is ready for analysis
    hotspots = code_graph_analyze("main", "hotspots")
    overview = code_graph_explore("main", "overview")

    # No need to re-run lsp_* or astgrep_* tools!
    """
    try:
        from ..validation import ValidationError, validate_file_path

        try:
            path = validate_file_path(file_path)
        except ValidationError as e:
            return _json_response({"status": "error", "message": str(e)})
        data = json.loads(path.read_text())
        graph = CodeGraph.from_node_link_data(data)
        _graphs[graph_id] = graph
        # Reset explorer
        _explorers.pop(graph_id, None)
    except (OSError, ValueError, TypeError, KeyError) as e:
        return _json_response({"status": "error", "message": f"Load failed: {e}"})

    return _json_response(
        {
            "status": "success",
            "graph_id": graph_id,
            "path": str(path),
            "nodes": graph.node_count,
            "edges": graph.edge_count,
        },
    )

code_graph_save

code_graph_save(graph_id, file_path)

Persist the code graph to disk for reuse in future sessions.

USE THIS TOOL: - After completing graph analysis (DEEP mode) - When you want to preserve analysis results - Before ending a session with valuable graph data

DO NOT USE: - For exporting to visualization formats (use code_graph_export) - On empty graphs (waste of disk space)

Saves the complete graph structure including: - All nodes with metadata (file_path, line numbers, categories) - All edges with types (calls, references, imports, inherits) - All analysis-relevant data

Saved graphs can be reloaded with code_graph_load, avoiding the need to re-run LSP/AST-grep tools.

Parameters:

Name Type Description Default
graph_id str

ID of the graph to save (must exist)

required
file_path str

Destination file path. Recommended locations: - ".code-context/code_graph.json": Standard location for main graph - ".code-context/{name}_graph.json": For named/scoped graphs Parent directories are created automatically.

required

Returns:

Name Type Description
JSON str

{ "status": "success", "graph_id": "main", "path": ".code-context/code_graph.json", "nodes": 150, "edges": 200

str

}

Output Size: ~100 bytes (file size varies: 10KB-1MB)

Common Errors
  • "Graph not found": Graph ID doesn't exist
  • "Save failed": File system error (permissions, disk full)

Workflow Example:

After comprehensive analysis in DEEP mode

code_graph_create("main")

... ingest LSP, AST-grep data ...

... run analysis ...

Save for future sessions

code_graph_save("main", ".code-context/code_graph.json")

In future session:

code_graph_load("main", ".code-context/code_graph.json")

Graph restored with all nodes/edges

Source code in src/code_context_agent/tools/graph/tools.py
@tool
def code_graph_save(
    graph_id: str,
    file_path: str,
) -> str:
    """Persist the code graph to disk for reuse in future sessions.

    USE THIS TOOL:
    - After completing graph analysis (DEEP mode)
    - When you want to preserve analysis results
    - Before ending a session with valuable graph data

    DO NOT USE:
    - For exporting to visualization formats (use code_graph_export)
    - On empty graphs (waste of disk space)

    Saves the complete graph structure including:
    - All nodes with metadata (file_path, line numbers, categories)
    - All edges with types (calls, references, imports, inherits)
    - All analysis-relevant data

    Saved graphs can be reloaded with code_graph_load, avoiding
    the need to re-run LSP/AST-grep tools.

    Args:
        graph_id: ID of the graph to save (must exist)
        file_path: Destination file path. Recommended locations:
            - ".code-context/code_graph.json": Standard location for main graph
            - ".code-context/{name}_graph.json": For named/scoped graphs
            Parent directories are created automatically.

    Returns:
        JSON: {
            "status": "success",
            "graph_id": "main",
            "path": ".code-context/code_graph.json",
            "nodes": 150,
            "edges": 200
        }

    Output Size: ~100 bytes (file size varies: 10KB-1MB)

    Common Errors:
        - "Graph not found": Graph ID doesn't exist
        - "Save failed": File system error (permissions, disk full)

    Workflow Example:

    # After comprehensive analysis in DEEP mode
    code_graph_create("main")
    # ... ingest LSP, AST-grep data ...
    # ... run analysis ...

    # Save for future sessions
    code_graph_save("main", ".code-context/code_graph.json")

    # In future session:
    code_graph_load("main", ".code-context/code_graph.json")
    # Graph restored with all nodes/edges
    """
    graph = _get_graph(graph_id)
    if graph is None:
        return _json_response({"status": "error", "message": f"Graph not found: {graph_id}"})

    try:
        from ..validation import ValidationError, validate_file_path

        try:
            path = validate_file_path(file_path, must_exist=False)
        except ValidationError as e:
            return _json_response({"status": "error", "message": str(e)})
        data = graph.to_node_link_data()
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_text(json.dumps(data, indent=2, default=str))
    except (OSError, ValueError, TypeError) as e:
        return _json_response({"status": "error", "message": f"Save failed: {e}"})

    return _json_response(
        {
            "status": "success",
            "graph_id": graph_id,
            "path": str(path),
            "nodes": graph.node_count,
            "edges": graph.edge_count,
        },
    )

code_graph_stats

code_graph_stats(graph_id)

Get summary statistics about a code graph.

USE THIS TOOL: - To verify graph was populated correctly after ingestion - To understand graph composition before analysis - For the completion signal (graph node/edge counts)

DO NOT USE: - For detailed analysis (use code_graph_analyze) - For exploration (use code_graph_explore)

Returns counts broken down by type: - Nodes by type: function, class, method, variable, pattern_match - Edges by type: calls, references, imports, inherits, tests

This helps verify: - LSP ingestion worked (function/class nodes exist) - AST-grep ingestion worked (pattern_match nodes exist) - Reference tracking worked (references edges exist) - Test mapping worked (tests edges exist)

Parameters:

Name Type Description Default
graph_id str

ID of the graph to get stats for (must exist)

required

Returns:

Name Type Description
JSON str

{ "status": "success", "graph_id": "main", "total_nodes": 150, "total_edges": 200, "nodes_by_type": { "function": 80, "class": 20, "method": 40, "pattern_match": 10 }, "edges_by_type": { "calls": 100, "references": 60, "imports": 30, "tests": 10 }

str

}

Output Size: ~300 bytes

Workflow Example:

After ingestion, verify graph state

stats = code_graph_stats("main")

Check ingestion worked

if stats["nodes_by_type"]["function"] == 0: # LSP symbols not ingested properly

if stats["edges_by_type"]["references"] == 0: # LSP references not ingested

Use in completion signal

Graph: {stats["total_nodes"]} nodes, {stats["total_edges"]} edges

Source code in src/code_context_agent/tools/graph/tools.py
@tool
def code_graph_stats(
    graph_id: str,
) -> str:
    """Get summary statistics about a code graph.

    USE THIS TOOL:
    - To verify graph was populated correctly after ingestion
    - To understand graph composition before analysis
    - For the completion signal (graph node/edge counts)

    DO NOT USE:
    - For detailed analysis (use code_graph_analyze)
    - For exploration (use code_graph_explore)

    Returns counts broken down by type:
    - Nodes by type: function, class, method, variable, pattern_match
    - Edges by type: calls, references, imports, inherits, tests

    This helps verify:
    - LSP ingestion worked (function/class nodes exist)
    - AST-grep ingestion worked (pattern_match nodes exist)
    - Reference tracking worked (references edges exist)
    - Test mapping worked (tests edges exist)

    Args:
        graph_id: ID of the graph to get stats for (must exist)

    Returns:
        JSON: {
            "status": "success",
            "graph_id": "main",
            "total_nodes": 150,
            "total_edges": 200,
            "nodes_by_type": {
                "function": 80,
                "class": 20,
                "method": 40,
                "pattern_match": 10
            },
            "edges_by_type": {
                "calls": 100,
                "references": 60,
                "imports": 30,
                "tests": 10
            }
        }

    Output Size: ~300 bytes

    Workflow Example:

    # After ingestion, verify graph state
    stats = code_graph_stats("main")

    # Check ingestion worked
    if stats["nodes_by_type"]["function"] == 0:
        # LSP symbols not ingested properly

    if stats["edges_by_type"]["references"] == 0:
        # LSP references not ingested

    # Use in completion signal
    # Graph: {stats["total_nodes"]} nodes, {stats["total_edges"]} edges
    """
    graph = _get_graph(graph_id)
    if graph is None:
        return _json_response({"status": "error", "message": f"Graph not found: {graph_id}"})

    # Count nodes by type
    node_types: dict[str, int] = {}
    for _, data in graph.nodes(data=True):
        ntype = data.get("node_type", "unknown")
        node_types[ntype] = node_types.get(ntype, 0) + 1

    # Count edges by type
    edge_types: dict[str, int] = {}
    for _, _, data in graph.edges(data=True):
        etype = data.get("edge_type", "unknown")
        edge_types[etype] = edge_types.get(etype, 0) + 1

    return _json_response(
        {
            "status": "success",
            "graph_id": graph_id,
            "total_nodes": graph.node_count,
            "total_edges": graph.edge_count,
            "nodes_by_type": node_types,
            "edges_by_type": edge_types,
        },
    )

lsp_definition async

lsp_definition(session_id, file_path, line, character)

Go to definition of symbol at position.

Use this to find where a symbol is defined, useful for tracing dependencies and understanding code structure.

Requires: Call lsp_start first to create a session.

Parameters:

Name Type Description Default
session_id str

Session ID from lsp_start.

required
file_path str

Absolute path to the file.

required
line int

0-indexed line number.

required
character int

0-indexed column number.

required

Returns:

Type Description
str

JSON array of Location objects pointing to definition(s).

Example

defn = await lsp_definition("ts:/path/repo", "/path/repo/src/index.ts", 5, 12)

Returns: [{"uri": "file:///path/repo/src/utils.ts", "range": {...}}]

Source code in src/code_context_agent/tools/lsp/tools.py
@tool
async def lsp_definition(session_id: str, file_path: str, line: int, character: int) -> str:
    """Go to definition of symbol at position.

    Use this to find where a symbol is defined, useful for tracing
    dependencies and understanding code structure.

    Requires: Call lsp_start first to create a session.

    Args:
        session_id: Session ID from lsp_start.
        file_path: Absolute path to the file.
        line: 0-indexed line number.
        character: 0-indexed column number.

    Returns:
        JSON array of Location objects pointing to definition(s).

    Example:
        >>> defn = await lsp_definition("ts:/path/repo", "/path/repo/src/index.ts", 5, 12)
        >>> # Returns: [{"uri": "file:///path/repo/src/utils.ts", "range": {...}}]
    """
    manager = get_session_manager()
    client = manager.get_session(session_id)

    if client is None:
        return json.dumps(
            {
                "status": "error",
                "error": f"No active LSP session: {session_id}. Call lsp_start first.",
            },
        )

    try:
        definitions = await client.definition(str(Path(file_path).resolve()), line, character)
        return json.dumps(
            {
                "status": "success",
                "file": file_path,
                "position": {"line": line, "character": character},
                "definitions": definitions,
                "count": len(definitions),
            },
        )
    except (OSError, TimeoutError, RuntimeError, ValueError) as e:
        logger.error(f"LSP definition error: {e}")
        return json.dumps(
            {
                "status": "error",
                "error": str(e),
            },
        )

lsp_document_symbols async

lsp_document_symbols(session_id, file_path)

Get document symbol outline (functions, classes, methods, variables).

USE THIS TOOL: - To get a structural overview of a file without reading full contents - To find function/class names and their line ranges for targeted reading - To understand file organization before diving into implementation - To get accurate symbol positions for lsp_references or lsp_hover calls

DO NOT USE: - Before calling lsp_start (will fail with "No active LSP session") - For searching across multiple files (use rg_search instead) - For simple line counting or file metadata (use read_file_bounded)

Requires: Call lsp_start first to create a session.

Parameters:

Name Type Description Default
session_id str

Session ID from lsp_start (format: "kind:workspace").

required
file_path str

Absolute path to the file to analyze.

required

Returns:

Type Description
str

JSON with symbols array containing name, kind, range, and nested children.

Output Size: ~100-500 bytes per symbol. Typical file: 1-5KB response.

Symbol Kinds (common values): - 5: Class - 6: Method - 12: Function - 13: Variable - 14: Constant - 23: Struct

Common Errors
  • "No active LSP session": Call lsp_start first
  • Empty symbols array: File may not be parseable or have no exports
  • "File not found": Ensure file_path is absolute and exists
Example success

{"status": "success", "file": "/repo/src/index.ts", "symbols": [ {"name": "main", "kind": 12, "range": {"start": {"line": 10}}, "children": []} ], "count": 5}

Workflow tip
  1. Use lsp_document_symbols to find symbol names and line numbers
  2. Use read_file_bounded with start_line to read specific sections
  3. Use lsp_references to find where symbols are used
Source code in src/code_context_agent/tools/lsp/tools.py
@tool
async def lsp_document_symbols(session_id: str, file_path: str) -> str:
    """Get document symbol outline (functions, classes, methods, variables).

    USE THIS TOOL:
    - To get a structural overview of a file without reading full contents
    - To find function/class names and their line ranges for targeted reading
    - To understand file organization before diving into implementation
    - To get accurate symbol positions for lsp_references or lsp_hover calls

    DO NOT USE:
    - Before calling lsp_start (will fail with "No active LSP session")
    - For searching across multiple files (use rg_search instead)
    - For simple line counting or file metadata (use read_file_bounded)

    Requires: Call lsp_start first to create a session.

    Args:
        session_id: Session ID from lsp_start (format: "kind:workspace").
        file_path: Absolute path to the file to analyze.

    Returns:
        JSON with symbols array containing name, kind, range, and nested children.

    Output Size: ~100-500 bytes per symbol. Typical file: 1-5KB response.

    Symbol Kinds (common values):
        - 5: Class
        - 6: Method
        - 12: Function
        - 13: Variable
        - 14: Constant
        - 23: Struct

    Common Errors:
        - "No active LSP session": Call lsp_start first
        - Empty symbols array: File may not be parseable or have no exports
        - "File not found": Ensure file_path is absolute and exists

    Example success:
        {"status": "success", "file": "/repo/src/index.ts", "symbols": [
            {"name": "main", "kind": 12, "range": {"start": {"line": 10}}, "children": []}
        ], "count": 5}

    Workflow tip:
        1. Use lsp_document_symbols to find symbol names and line numbers
        2. Use read_file_bounded with start_line to read specific sections
        3. Use lsp_references to find where symbols are used
    """
    manager = get_session_manager()
    client = manager.get_session(session_id)

    if client is None:
        return json.dumps(
            {
                "status": "error",
                "error": f"No active LSP session: {session_id}. Call lsp_start first.",
            },
        )

    server_cmd = manager.get_server_command_for_session(session_id) or "unknown"
    fallback_used = False

    try:
        symbols = await client.document_symbols(str(Path(file_path).resolve()))

        # Result-level fallback: if primary returned empty, try next server
        if not symbols:
            fallback = await _try_fallback_session(session_id, file_path)
            if fallback is not None:
                symbols, server_cmd = fallback
                fallback_used = True

        return json.dumps(
            {
                "status": "success",
                "file": file_path,
                "symbols": symbols,
                "count": len(symbols),
                "server": server_cmd,
                "fallback_used": fallback_used,
            },
        )
    except (OSError, TimeoutError, RuntimeError, ValueError) as e:
        logger.error(f"LSP document_symbols error: {e}")
        return json.dumps(
            {
                "status": "error",
                "error": str(e),
            },
        )

lsp_hover async

lsp_hover(session_id, file_path, line, character)

Get hover information at a position (docstrings, JSDoc, type info).

Retrieves documentation and type information for the symbol at the given position. This is how you extract docstrings and JSDoc comments.

Requires: Call lsp_start first to create a session.

Args: session_id: Session ID from lsp_start. file_path: Absolute path to the file. line: 0-indexed line number. character: 0-indexed column number.

Returns: JSON object with hover contents (often includes markdown documentation).

Example: >>> hover = await lsp_hover("ts:/path/repo", "/path/repo/src/utils.ts", 10, 5) >>> # Returns: {"contents": {"kind": "markdown", "value": "/** * Utility function..."}}

Source code in src/code_context_agent/tools/lsp/tools.py
@tool
async def lsp_hover(session_id: str, file_path: str, line: int, character: int) -> str:
    """Get hover information at a position (docstrings, JSDoc, type info).

    Retrieves documentation and type information for the symbol at the given
    position. This is how you extract docstrings and JSDoc comments.

    Requires: Call lsp_start first to create a session.

    Args:
        session_id: Session ID from lsp_start.
        file_path: Absolute path to the file.
        line: 0-indexed line number.
        character: 0-indexed column number.

    Returns:
        JSON object with hover contents (often includes markdown documentation).

    Example:
        >>> hover = await lsp_hover("ts:/path/repo", "/path/repo/src/utils.ts", 10, 5)
        >>> # Returns: {"contents": {"kind": "markdown", "value": "/**\n * Utility function..."}}
    """
    manager = get_session_manager()
    client = manager.get_session(session_id)

    if client is None:
        return json.dumps(
            {
                "status": "error",
                "error": f"No active LSP session: {session_id}. Call lsp_start first.",
            },
        )

    try:
        hover = await client.hover(str(Path(file_path).resolve()), line, character)
        return json.dumps(
            {
                "status": "success",
                "file": file_path,
                "position": {"line": line, "character": character},
                "hover": hover,
            },
        )
    except (OSError, TimeoutError, RuntimeError, ValueError) as e:
        logger.error(f"LSP hover error: {e}")
        return json.dumps(
            {
                "status": "error",
                "error": str(e),
            },
        )

lsp_references async

lsp_references(
    session_id,
    file_path,
    line,
    character,
    include_declaration=True,
)

Find all references to symbol at position (fan-in analysis).

Use this to understand how widely a symbol is used across the codebase. The number of unique referencing files indicates the symbol's centrality.

Requires: Call lsp_start first to create a session.

Parameters:

Name Type Description Default
session_id str

Session ID from lsp_start.

required
file_path str

Absolute path to the file.

required
line int

0-indexed line number.

required
character int

0-indexed column number.

required
include_declaration bool

Whether to include the declaration itself.

True

Returns:

Type Description
str

JSON array of Location objects with uri and range.

Example

refs = await lsp_references("ts:/path/repo", "/path/repo/src/api.ts", 25, 10)

Returns: [{"uri": "file:///path/repo/src/handler.ts", "range": {...}}, ...]

Source code in src/code_context_agent/tools/lsp/tools.py
@tool
async def lsp_references(
    session_id: str,
    file_path: str,
    line: int,
    character: int,
    include_declaration: bool = True,
) -> str:
    """Find all references to symbol at position (fan-in analysis).

    Use this to understand how widely a symbol is used across the codebase.
    The number of unique referencing files indicates the symbol's centrality.

    Requires: Call lsp_start first to create a session.

    Args:
        session_id: Session ID from lsp_start.
        file_path: Absolute path to the file.
        line: 0-indexed line number.
        character: 0-indexed column number.
        include_declaration: Whether to include the declaration itself.

    Returns:
        JSON array of Location objects with uri and range.

    Example:
        >>> refs = await lsp_references("ts:/path/repo", "/path/repo/src/api.ts", 25, 10)
        >>> # Returns: [{"uri": "file:///path/repo/src/handler.ts", "range": {...}}, ...]
    """
    manager = get_session_manager()
    client = manager.get_session(session_id)

    if client is None:
        return json.dumps(
            {
                "status": "error",
                "error": f"No active LSP session: {session_id}. Call lsp_start first.",
            },
        )

    try:
        refs = await client.references(str(Path(file_path).resolve()), line, character, include_declaration)

        # Count unique files for fan-in metric
        unique_files = set()
        for ref in refs:
            uri = ref.get("uri", "")
            if uri:
                unique_files.add(uri)

        return json.dumps(
            {
                "status": "success",
                "file": file_path,
                "position": {"line": line, "character": character},
                "references": refs,
                "total_count": len(refs),
                "unique_files": len(unique_files),
            },
        )
    except (OSError, TimeoutError, RuntimeError, ValueError) as e:
        logger.error(f"LSP references error: {e}")
        return json.dumps(
            {
                "status": "error",
                "error": str(e),
            },
        )

lsp_shutdown async

lsp_shutdown(session_id)

Shutdown an LSP server session.

Use this when you're done with a workspace to free resources. Sessions are automatically cleaned up when the agent finishes, but explicit shutdown is more efficient.

Parameters:

Name Type Description Default
session_id str

Session ID from lsp_start.

required

Returns:

Type Description
str

JSON object with shutdown status.

Example

await lsp_shutdown("ts:/path/repo")

Returns:

Source code in src/code_context_agent/tools/lsp/tools.py
@tool
async def lsp_shutdown(session_id: str) -> str:
    """Shutdown an LSP server session.

    Use this when you're done with a workspace to free resources.
    Sessions are automatically cleaned up when the agent finishes,
    but explicit shutdown is more efficient.

    Args:
        session_id: Session ID from lsp_start.

    Returns:
        JSON object with shutdown status.

    Example:
        >>> await lsp_shutdown("ts:/path/repo")
        >>> # Returns: {"status": "success", "message": "Session shutdown"}
    """
    manager = get_session_manager()

    success = await manager.shutdown_session(session_id)

    if success:
        return json.dumps(
            {
                "status": "success",
                "message": f"LSP session shutdown: {session_id}",
            },
        )
    return json.dumps(
        {
            "status": "warning",
            "message": f"No active session found: {session_id}",
        },
    )

lsp_start async

lsp_start(server_kind, workspace_path)

Start an LSP server and initialize workspace for semantic analysis.

USE THIS TOOL: - Before using any other lsp_* tools (required prerequisite) - Once per language per workspace (session is reused automatically) - When you need semantic analysis: symbols, references, definitions, hover

DO NOT USE: - If you already started an LSP session for this language/workspace combo - For simple text searches (use rg_search instead - much faster) - For repos without the target language (e.g., don't start "ts" for Python-only repo)

Supported server kinds: - "ts": TypeScript/JavaScript (typescript-language-server) - "py": Python (ty server, with pyright-langserver fallback)

Parameters:

Name Type Description Default
server_kind str

Server type - "ts" for TypeScript/JS, "py" for Python.

required
workspace_path str

Absolute path to the repository/workspace root.

required

Returns:

Type Description
str

Session ID (format: "kind:path") for use in subsequent LSP calls.

Output Size: ~150 bytes JSON response.

Resource Usage
  • Memory: 100-500MB per server (depends on project size)
  • Startup time: 2-30 seconds (indexing dependent)
  • Sessions persist until lsp_shutdown or agent exit
Common Errors
  • "typescript-language-server not found": npm install -g typescript-language-server
  • "ty not found": uv tool install ty or pip install ty
  • Timeout: Large projects may exceed startup_timeout, increase in config
  • "No tsconfig.json": TypeScript server needs tsconfig.json in workspace
Example success

{"status": "success", "session_id": "ts:/path/repo", "message": "LSP server started..."}

Workflow
  1. lsp_start("ts", "/repo") # Start once
  2. lsp_document_symbols(session_id, file) # Use many times
  3. lsp_references(session_id, file, line, char)
  4. lsp_shutdown(session_id) # Optional cleanup
Source code in src/code_context_agent/tools/lsp/tools.py
@tool
async def lsp_start(server_kind: str, workspace_path: str) -> str:
    """Start an LSP server and initialize workspace for semantic analysis.

    USE THIS TOOL:
    - Before using any other lsp_* tools (required prerequisite)
    - Once per language per workspace (session is reused automatically)
    - When you need semantic analysis: symbols, references, definitions, hover

    DO NOT USE:
    - If you already started an LSP session for this language/workspace combo
    - For simple text searches (use rg_search instead - much faster)
    - For repos without the target language (e.g., don't start "ts" for Python-only repo)

    Supported server kinds:
    - "ts": TypeScript/JavaScript (typescript-language-server)
    - "py": Python (ty server, with pyright-langserver fallback)

    Args:
        server_kind: Server type - "ts" for TypeScript/JS, "py" for Python.
        workspace_path: Absolute path to the repository/workspace root.

    Returns:
        Session ID (format: "kind:path") for use in subsequent LSP calls.

    Output Size: ~150 bytes JSON response.

    Resource Usage:
        - Memory: 100-500MB per server (depends on project size)
        - Startup time: 2-30 seconds (indexing dependent)
        - Sessions persist until lsp_shutdown or agent exit

    Common Errors:
        - "typescript-language-server not found": npm install -g typescript-language-server
        - "ty not found": uv tool install ty or pip install ty
        - Timeout: Large projects may exceed startup_timeout, increase in config
        - "No tsconfig.json": TypeScript server needs tsconfig.json in workspace

    Example success:
        {"status": "success", "session_id": "ts:/path/repo", "message": "LSP server started..."}

    Workflow:
        1. lsp_start("ts", "/repo")  # Start once
        2. lsp_document_symbols(session_id, file)  # Use many times
        3. lsp_references(session_id, file, line, char)
        4. lsp_shutdown(session_id)  # Optional cleanup
    """
    manager = get_session_manager()
    workspace = str(Path(workspace_path).resolve())
    settings = get_settings()

    try:
        await manager.get_or_create(server_kind, workspace, startup_timeout=settings.lsp_startup_timeout)
    except (OSError, TimeoutError, RuntimeError, ValueError) as e:
        logger.error(f"LSP server failed to start: {e}")
        return json.dumps(
            {
                "status": "error",
                "error": str(e),
                "message": f"LSP server failed to start for {server_kind}. "
                "This is a CRITICAL FAILURE - do not proceed without LSP. "
                "Fix the issue and retry.",
            },
        )

    # Normalize kind for consistent session ID
    kind = server_kind.lower()
    aliases = {"typescript": "ts", "python": "py", "javascript": "ts"}
    kind = aliases.get(kind, kind)
    session_id = f"{kind}:{workspace}"

    server_cmd = manager.get_server_command_for_session(session_id) or server_kind

    return json.dumps(
        {
            "status": "success",
            "session_id": session_id,
            "server": server_cmd,
            "message": f"LSP server started for {server_kind} at {workspace}",
        },
    )

run_command

run_command(
    cmd,
    cwd=None,
    timeout=120,
    max_output=100000,
    input_data=None,
)

Run shell command with bounds.

Uses shell=False with shlex parsing for security. For commands requiring shell features (pipes, redirects), pass a list like ["sh", "-c", "cmd"].

Parameters:

Name Type Description Default
cmd str | list[str]

Command string or list of arguments.

required
cwd str | None

Working directory.

None
timeout int

Maximum execution time in seconds.

120
max_output int

Maximum characters to capture.

100000
input_data str | None

Optional string to send to stdin.

None

Returns:

Type Description
CommandResult

Dict with status, stdout, stderr, return_code, and truncated flag.

Source code in src/code_context_agent/tools/shell.py
def run_command(
    cmd: str | list[str],
    cwd: str | None = None,
    timeout: int = 120,
    max_output: int = 100_000,
    input_data: str | None = None,
) -> CommandResult:
    """Run shell command with bounds.

    Uses shell=False with shlex parsing for security. For commands requiring
    shell features (pipes, redirects), pass a list like ["sh", "-c", "cmd"].

    Args:
        cmd: Command string or list of arguments.
        cwd: Working directory.
        timeout: Maximum execution time in seconds.
        max_output: Maximum characters to capture.
        input_data: Optional string to send to stdin.

    Returns:
        Dict with status, stdout, stderr, return_code, and truncated flag.
    """
    # Convert string to list safely using ternary for clarity
    cmd_list = shlex.split(cmd) if isinstance(cmd, str) else cmd

    try:
        result = subprocess.run(
            cmd_list,
            cwd=cwd,
            capture_output=True,
            text=True,
            timeout=timeout,
            input=input_data,
        )

        stdout = result.stdout[:max_output]
        truncated = len(result.stdout) > max_output

        if truncated:
            stdout += f"\n... (truncated, {len(result.stdout)} total chars)"

        return {
            "status": "success" if result.returncode == 0 else "error",
            "stdout": stdout,
            "stderr": result.stderr[:10000],
            "return_code": result.returncode,
            "truncated": truncated,
        }
    except subprocess.TimeoutExpired:
        return {
            "status": "error",
            "stdout": "",
            "stderr": f"Command timed out after {timeout}s",
            "return_code": -1,
            "truncated": False,
        }
    except (subprocess.SubprocessError, OSError) as e:
        return {
            "status": "error",
            "stdout": "",
            "stderr": str(e),
            "return_code": -1,
            "truncated": False,
        }

validate_file_path

validate_file_path(path, must_exist=True)

Validate file path is safe.

Parameters:

Name Type Description Default
path str

User-provided file path.

required
must_exist bool

If True, file must exist.

True

Returns:

Type Description
Path

Resolved Path object.

Raises:

Type Description
ValidationError

If path is dangerous or invalid.

Source code in src/code_context_agent/tools/validation.py
def validate_file_path(path: str, must_exist: bool = True) -> Path:
    """Validate file path is safe.

    Args:
        path: User-provided file path.
        must_exist: If True, file must exist.

    Returns:
        Resolved Path object.

    Raises:
        ValidationError: If path is dangerous or invalid.
    """
    resolved = Path(path).resolve()

    # Prevent path traversal
    if ".." in path:
        raise ValidationError(f"Path traversal detected: {path}")

    # Must exist if required
    if must_exist and not resolved.exists():
        raise ValidationError(f"File does not exist: {path}")

    # Must be file if exists
    if resolved.exists() and not resolved.is_file():
        raise ValidationError(f"Path is not a file: {path}")

    return resolved

validate_glob_pattern

validate_glob_pattern(pattern)

Validate glob pattern is safe.

Parameters:

Name Type Description Default
pattern str

User-provided glob pattern.

required

Returns:

Type Description
str

Validated pattern string.

Raises:

Type Description
ValidationError

If pattern contains dangerous characters.

Source code in src/code_context_agent/tools/validation.py
def validate_glob_pattern(pattern: str) -> str:
    """Validate glob pattern is safe.

    Args:
        pattern: User-provided glob pattern.

    Returns:
        Validated pattern string.

    Raises:
        ValidationError: If pattern contains dangerous characters.
    """
    # Prevent command injection via glob
    dangerous_chars = re.compile(r"[;&|`$(){}\\]")
    if dangerous_chars.search(pattern):
        raise ValidationError(f"Invalid characters in pattern: {pattern}")

    # Prevent path traversal
    if ".." in pattern:
        raise ValidationError(f"Path traversal in pattern: {pattern}")

    return pattern

validate_path_within_repo

validate_path_within_repo(path, repo_root)

Validate that a path is contained within the repository root.

Parameters:

Name Type Description Default
path str

Path to validate.

required
repo_root str

Repository root path.

required

Returns:

Type Description
Path

Resolved Path object.

Raises:

Type Description
ValidationError

If path escapes the repository root.

Source code in src/code_context_agent/tools/validation.py
def validate_path_within_repo(path: str, repo_root: str) -> Path:
    """Validate that a path is contained within the repository root.

    Args:
        path: Path to validate.
        repo_root: Repository root path.

    Returns:
        Resolved Path object.

    Raises:
        ValidationError: If path escapes the repository root.
    """
    resolved = Path(path).resolve()
    root = Path(repo_root).resolve()
    if resolved != root and not str(resolved).startswith(str(root) + "/"):
        raise ValidationError(f"Path {resolved} is outside repository root {root}")
    return resolved

validate_repo_path

validate_repo_path(path)

Validate repository path is safe to use.

Parameters:

Name Type Description Default
path str

User-provided path string.

required

Returns:

Type Description
Path

Resolved Path object.

Raises:

Type Description
ValidationError

If path is dangerous or invalid.

Example

validate_repo_path("/home/user/project") PosixPath('/home/user/project')

Source code in src/code_context_agent/tools/validation.py
def validate_repo_path(path: str) -> Path:
    """Validate repository path is safe to use.

    Args:
        path: User-provided path string.

    Returns:
        Resolved Path object.

    Raises:
        ValidationError: If path is dangerous or invalid.

    Example:
        >>> validate_repo_path("/home/user/project")
        PosixPath('/home/user/project')
    """
    resolved = Path(path).resolve()

    # Prevent path traversal
    if ".." in path:
        raise ValidationError(f"Path traversal detected: {path}")

    # Prevent system paths
    dangerous = {"/", "/etc", "/usr", "/var", "/bin", "/sbin", "/root", "/boot"}
    if str(resolved) in dangerous:
        raise ValidationError(f"Dangerous system path: {path}")

    # Must exist and be directory
    if not resolved.exists():
        raise ValidationError(f"Path does not exist: {path}")
    if not resolved.is_dir():
        raise ValidationError(f"Path is not a directory: {path}")

    return resolved

validate_search_pattern

validate_search_pattern(pattern, max_length=1000)

Validate search pattern (regex) is safe.

Parameters:

Name Type Description Default
pattern str

User-provided search pattern.

required
max_length int

Maximum allowed pattern length.

1000

Returns:

Type Description
str

Validated pattern string.

Raises:

Type Description
ValidationError

If pattern is invalid or too long.

Source code in src/code_context_agent/tools/validation.py
def validate_search_pattern(pattern: str, max_length: int = 1000) -> str:
    """Validate search pattern (regex) is safe.

    Args:
        pattern: User-provided search pattern.
        max_length: Maximum allowed pattern length.

    Returns:
        Validated pattern string.

    Raises:
        ValidationError: If pattern is invalid or too long.
    """
    if len(pattern) > max_length:
        raise ValidationError(f"Pattern too long: {len(pattern)} > {max_length}")

    # Try to compile regex to catch syntax errors early
    try:
        re.compile(pattern)
    except re.error as e:
        raise ValidationError(f"Invalid regex pattern: {e}") from e

    return pattern