Add Deepgram MCP Server - speech-to-text and TTS

Python FastMCP server wrapping Deepgram API for audio transcription and text-to-speech. Supports 125+ multilingual voices, large file chunking via FFmpeg, formatted markdown output with speaker diarization, and Docker deployment on port 8009. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 15:17:52 +01:00
parent ea5775da25
commit 0ba2896565
13 changed files with 1583 additions and 0 deletions
--- a/deepgram-mcp/.env.example
+++ b/deepgram-mcp/.env.example
@@ -0,0 +1 @@
+DEEPGRAM_API_KEY=your_api_key_here
--- a/deepgram-mcp/Dockerfile
+++ b/deepgram-mcp/Dockerfile
@@ -0,0 +1,21 @@
+FROM python:3.11-slim
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends ffmpeg curl && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY src/ src/
+
+ENV PYTHONPATH=/app/src
+
+EXPOSE 8009
+
+HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
+    CMD curl -f http://localhost:8009/health || exit 1
+
+CMD ["python", "-m", "deepgram_mcp.server"]
--- a/deepgram-mcp/docker-compose.yml
+++ b/deepgram-mcp/docker-compose.yml
@@ -0,0 +1,21 @@
+services:
+  deepgram-mcp:
+    build: .
+    container_name: deepgram-mcp
+    restart: unless-stopped
+    ports:
+      - "8009:8009"
+    volumes:
+      - deepgram-uploads:/data/uploads
+      - deepgram-tts:/data/tts_output
+    env_file:
+      - .env
+    environment:
+      - UPLOAD_DIR=/data/uploads
+      - TTS_DIR=/data/tts_output
+      - HOST=0.0.0.0
+      - PORT=8009
+
+volumes:
+  deepgram-uploads:
+  deepgram-tts:
--- a/deepgram-mcp/requirements.txt
+++ b/deepgram-mcp/requirements.txt
@@ -0,0 +1,7 @@
+fastmcp>=2.0.0
+httpx
+aiofiles
+python-dotenv
+python-multipart
+starlette
+uvicorn
--- a/deepgram-mcp/src/deepgram_mcp/init.py
+++ b/deepgram-mcp/src/deepgram_mcp/init.py
@@ -0,0 +1 @@
+# Deepgram MCP Server
--- a/deepgram-mcp/src/deepgram_mcp/pycache/init.cpython-312.pyc
+++ b/deepgram-mcp/src/deepgram_mcp/pycache/init.cpython-312.pyc
--- a/deepgram-mcp/src/deepgram_mcp/pycache/formatter.cpython-312.pyc
+++ b/deepgram-mcp/src/deepgram_mcp/pycache/formatter.cpython-312.pyc
--- a/deepgram-mcp/src/deepgram_mcp/file_manager.py
+++ b/deepgram-mcp/src/deepgram_mcp/file_manager.py
@@ -0,0 +1,101 @@
+"""File upload, download, and listing management for Deepgram MCP server."""
+
+import os
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+
+import aiofiles
+
+UPLOAD_DIR = Path(os.getenv("UPLOAD_DIR", "/data/uploads"))
+TTS_DIR = Path(os.getenv("TTS_DIR", "/data/tts_output"))
+
+UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
+TTS_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def _sanitize_filename(filename: str) -> str:
+    """Strip path components and dangerous characters from a filename."""
+    # Take only the basename (no directory traversal)
+    name = Path(filename).name
+    # Remove any remaining path separators or null bytes
+    name = re.sub(r'[/\\:\x00]', '', name)
+    # Collapse whitespace
+    name = re.sub(r'\s+', '_', name.strip())
+    if not name:
+        name = "unnamed_file"
+    return name
+
+
+def _timestamp_prefix() -> str:
+    """Generate a timestamp prefix for collision avoidance."""
+    return datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
+
+
+async def save_upload(filename: str, content: bytes) -> dict:
+    """Save uploaded file content with a timestamp prefix to avoid collisions.
+
+    Returns dict with filename, path, and size_mb.
+    """
+    safe_name = _sanitize_filename(filename)
+    prefixed_name = f"{_timestamp_prefix()}_{safe_name}"
+    dest = UPLOAD_DIR / prefixed_name
+
+    async with aiofiles.open(dest, "wb") as f:
+        await f.write(content)
+
+    size_mb = round(dest.stat().st_size / (1024 * 1024), 2)
+    return {
+        "filename": prefixed_name,
+        "path": str(dest),
+        "size_mb": size_mb,
+    }
+
+
+def list_files(directory: Path) -> list[dict]:
+    """List files in a directory with name, size_mb, and modified date."""
+    if not directory.is_dir():
+        return []
+
+    files = []
+    for entry in sorted(directory.iterdir()):
+        if entry.is_file():
+            stat = entry.stat()
+            files.append({
+                "name": entry.name,
+                "size_mb": round(stat.st_size / (1024 * 1024), 2),
+                "modified": datetime.fromtimestamp(
+                    stat.st_mtime, tz=timezone.utc
+                ).isoformat(),
+            })
+    return files
+
+
+def delete_file(directory: Path, filename: str) -> bool:
+    """Delete a file from the given directory. Returns True on success."""
+    safe_name = _sanitize_filename(filename)
+    target = directory / safe_name
+
+    # Ensure the resolved path is still within the directory
+    try:
+        target.resolve().relative_to(directory.resolve())
+    except ValueError:
+        return False
+
+    if target.is_file():
+        target.unlink()
+        return True
+    return False
+
+
+def get_file_path(directory: Path, filename: str) -> Path | None:
+    """Return the full path if the file exists in the directory, else None."""
+    safe_name = _sanitize_filename(filename)
+    target = directory / safe_name
+
+    try:
+        target.resolve().relative_to(directory.resolve())
+    except ValueError:
+        return None
+
+    return target if target.is_file() else None
--- a/deepgram-mcp/src/deepgram_mcp/formatter.py
+++ b/deepgram-mcp/src/deepgram_mcp/formatter.py
@@ -0,0 +1,332 @@
+"""Format Deepgram JSON responses into readable markdown."""
+
+from __future__ import annotations
+
+
+def format_timestamp(seconds: float) -> str:
+    """Format seconds into H:MM:SS or M:SS."""
+    total = int(seconds)
+    h, remainder = divmod(total, 3600)
+    m, s = divmod(remainder, 60)
+    if h > 0:
+        return f"{h}:{m:02d}:{s:02d}"
+    return f"{m}:{s:02d}"
+
+
+def format_duration(seconds: float) -> str:
+    """Format seconds into human-readable duration like '5m 32s' or '1h 5m 32s'."""
+    total = int(seconds)
+    h, remainder = divmod(total, 3600)
+    m, s = divmod(remainder, 60)
+    parts: list[str] = []
+    if h > 0:
+        parts.append(f"{h}h")
+    if m > 0 or h > 0:
+        parts.append(f"{m}m")
+    parts.append(f"{s}s")
+    return " ".join(parts)
+
+
+def truncate_result(text: str, max_chars: int = 80000) -> tuple[str, bool]:
+    """Truncate text at last newline before limit if too long."""
+    if len(text) <= max_chars:
+        return text, False
+    truncated = text[:max_chars]
+    last_newline = truncated.rfind("\n")
+    if last_newline > 0:
+        truncated = truncated[:last_newline]
+    truncated += "\n\n---\n*[Truncated - full transcript saved to file]*"
+    return truncated, True
+
+
+def format_transcription(response: dict, include_timestamps: bool = True) -> str:
+    """Format a Deepgram transcription response into readable markdown.
+
+    Args:
+        response: Raw Deepgram JSON response dict.
+        include_timestamps: Whether to include timestamps in transcript output.
+
+    Returns:
+        Formatted markdown string.
+    """
+    sections: list[str] = []
+
+    metadata = response.get("metadata") or {}
+    results = response.get("results") or {}
+    channels = results.get("channels") or []
+    first_alt = {}
+    if channels:
+        alts = channels[0].get("alternatives") or []
+        if alts:
+            first_alt = alts[0]
+
+    # --- Metadata header ---
+    section = _format_metadata(metadata, first_alt)
+    if section:
+        sections.append(section)
+
+    # --- Transcript ---
+    utterances = results.get("utterances")
+    section = _format_transcript(first_alt, utterances, include_timestamps)
+    if section:
+        sections.append(section)
+
+    # --- Summary ---
+    section = _format_summaries(first_alt)
+    if section:
+        sections.append(section)
+
+    # --- Topics ---
+    section = _format_topics(first_alt)
+    if section:
+        sections.append(section)
+
+    # --- Entities ---
+    section = _format_entities(first_alt)
+    if section:
+        sections.append(section)
+
+    # --- Sentiment ---
+    section = _format_sentiment(first_alt)
+    if section:
+        sections.append(section)
+
+    # --- Intents ---
+    section = _format_intents(first_alt)
+    if section:
+        sections.append(section)
+
+    # --- Search Results ---
+    section = _format_search(first_alt)
+    if section:
+        sections.append(section)
+
+    return "\n\n".join(sections)
+
+
+def _format_metadata(metadata: dict, first_alt: dict) -> str:
+    """Build the metadata header section."""
+    lines = ["## Transcription Results"]
+
+    duration = metadata.get("duration")
+    if duration is not None:
+        lines.append(f"- **Duration:** {format_duration(duration)}")
+
+    model_info = metadata.get("model_info")
+    if model_info and isinstance(model_info, dict):
+        for info in model_info.values():
+            name = info.get("name") if isinstance(info, dict) else None
+            if name:
+                lines.append(f"- **Model:** {name}")
+                break
+
+    confidence = first_alt.get("confidence")
+    if confidence is not None:
+        lines.append(f"- **Confidence:** {confidence * 100:.1f}%")
+
+    num_channels = metadata.get("channels")
+    if num_channels is not None:
+        lines.append(f"- **Channels:** {num_channels}")
+
+    return "\n".join(lines)
+
+
+def _format_transcript(
+    first_alt: dict,
+    utterances: list[dict] | None,
+    include_timestamps: bool,
+) -> str:
+    """Build the transcript section using utterances, paragraphs, or plain text."""
+    # Prefer utterances (diarized output)
+    if utterances:
+        lines = ["### Transcript", ""]
+        for utt in utterances:
+            speaker = utt.get("speaker", "?")
+            text = utt.get("transcript", "").strip()
+            if include_timestamps:
+                start = format_timestamp(utt.get("start", 0))
+                end = format_timestamp(utt.get("end", 0))
+                lines.append(f"**Speaker {speaker}** ({start} - {end}): {text}")
+            else:
+                lines.append(f"**Speaker {speaker}**: {text}")
+            lines.append("")
+        return "\n".join(lines).rstrip()
+
+    # Fall back to paragraphs
+    paragraphs_data = first_alt.get("paragraphs")
+    if paragraphs_data and isinstance(paragraphs_data, dict):
+        paras = paragraphs_data.get("paragraphs") or []
+        if paras:
+            lines = ["### Transcript", ""]
+            for para in paras:
+                speaker = para.get("speaker")
+                sentences = para.get("sentences") or []
+                text = " ".join(s.get("text", "") for s in sentences).strip()
+                if not text:
+                    continue
+                if speaker is not None and include_timestamps:
+                    start = format_timestamp(para.get("start", 0))
+                    end = format_timestamp(para.get("end", 0))
+                    lines.append(
+                        f"**Speaker {speaker}** ({start} - {end}): {text}"
+                    )
+                elif speaker is not None:
+                    lines.append(f"**Speaker {speaker}**: {text}")
+                else:
+                    lines.append(text)
+                lines.append("")
+            return "\n".join(lines).rstrip()
+
+    # Fall back to plain transcript
+    transcript = first_alt.get("transcript", "").strip()
+    if transcript:
+        return f"### Transcript\n\n{transcript}"
+
+    return ""
+
+
+def _format_summaries(first_alt: dict) -> str:
+    """Build the summary section."""
+    summaries = first_alt.get("summaries")
+    if not summaries:
+        return ""
+    texts = [s.get("summary", "") for s in summaries if s.get("summary")]
+    if not texts:
+        return ""
+    return "### Summary\n\n" + "\n\n".join(texts)
+
+
+def _format_topics(first_alt: dict) -> str:
+    """Build the topics section."""
+    topics_data = first_alt.get("topics")
+    if not topics_data or not isinstance(topics_data, dict):
+        return ""
+    segments = topics_data.get("segments") or []
+    # Collect unique topics with their highest confidence
+    seen: dict[str, float] = {}
+    for seg in segments:
+        for t in seg.get("topics") or []:
+            topic = t.get("topic", "")
+            conf = t.get("confidence", 0)
+            if topic and (topic not in seen or conf > seen[topic]):
+                seen[topic] = conf
+    if not seen:
+        return ""
+    lines = ["### Topics"]
+    for topic, conf in sorted(seen.items(), key=lambda x: x[1], reverse=True):
+        lines.append(f"- **{topic}** ({conf * 100:.1f}%)")
+    return "\n".join(lines)
+
+
+def _format_entities(first_alt: dict) -> str:
+    """Build the entities table."""
+    entities_data = first_alt.get("entities")
+    if not entities_data or not isinstance(entities_data, dict):
+        return ""
+    segments = entities_data.get("segments") or []
+    rows: list[tuple[str, str, float]] = []
+    for seg in segments:
+        for ent in seg.get("entities") or []:
+            label = ent.get("label", "")
+            value = ent.get("value", "")
+            conf = ent.get("confidence", 0)
+            if label and value:
+                rows.append((label, value, conf))
+    if not rows:
+        return ""
+    lines = [
+        "### Entities",
+        "",
+        "| Type | Value | Confidence |",
+        "|------|-------|------------|",
+    ]
+    for label, value, conf in rows:
+        lines.append(f"| {label} | {value} | {conf * 100:.1f}% |")
+    return "\n".join(lines)
+
+
+def _format_sentiment(first_alt: dict) -> str:
+    """Build the sentiment section."""
+    sentiments_data = first_alt.get("sentiments")
+    if not sentiments_data or not isinstance(sentiments_data, dict):
+        return ""
+
+    lines = ["### Sentiment"]
+
+    average = sentiments_data.get("average")
+    if average and isinstance(average, dict):
+        sentiment = average.get("sentiment", "")
+        score = average.get("sentiment_score")
+        if sentiment and score is not None:
+            lines.append(f"\n**Overall:** {sentiment.capitalize()} ({score:.2f})")
+
+    segments = sentiments_data.get("segments") or []
+    if segments:
+        lines.append("")
+        lines.append("| Segment | Sentiment | Score |")
+        lines.append("|---------|-----------|-------|")
+        for seg in segments:
+            text = seg.get("text", "").strip()
+            sentiment = seg.get("sentiment", "")
+            score = seg.get("sentiment_score")
+            if text and sentiment and score is not None:
+                # Truncate long segment text for table readability
+                display = text if len(text) <= 60 else text[:57] + "..."
+                lines.append(
+                    f'| "{display}" | {sentiment.capitalize()} | {score:.2f} |'
+                )
+
+    if len(lines) <= 1:
+        return ""
+    return "\n".join(lines)
+
+
+def _format_intents(first_alt: dict) -> str:
+    """Build the intents section."""
+    intents_data = first_alt.get("intents")
+    if not intents_data or not isinstance(intents_data, dict):
+        return ""
+    segments = intents_data.get("segments") or []
+    # Collect unique intents with highest confidence
+    seen: dict[str, float] = {}
+    for seg in segments:
+        for intent in seg.get("intents") or []:
+            name = intent.get("intent", "")
+            conf = intent.get("confidence", 0)
+            if name and (name not in seen or conf > seen[name]):
+                seen[name] = conf
+    if not seen:
+        return ""
+    lines = ["### Intents"]
+    for name, conf in sorted(seen.items(), key=lambda x: x[1], reverse=True):
+        lines.append(f"- **{name}** ({conf * 100:.1f}%)")
+    return "\n".join(lines)
+
+
+def _format_search(first_alt: dict) -> str:
+    """Build the search results section with timestamps."""
+    search_data = first_alt.get("search")
+    if not search_data:
+        return ""
+
+    lines = ["### Search Results"]
+    for group in search_data:
+        query = group.get("query", "")
+        hits = group.get("hits") or []
+        lines.append(f"\n**\"{query}\"**")
+        if not hits:
+            lines.append("No matches found.")
+            continue
+        for hit in hits:
+            snippet = hit.get("snippet", "")
+            start = hit.get("start", 0)
+            end = hit.get("end", 0)
+            conf = hit.get("confidence", 0)
+            lines.append(
+                f"- ({format_timestamp(start)} - {format_timestamp(end)}) "
+                f"*{snippet}* ({conf * 100:.1f}%)"
+            )
+
+    if len(lines) <= 1:
+        return ""
+    return "\n".join(lines)
--- a/deepgram-mcp/src/deepgram_mcp/server.py
+++ b/deepgram-mcp/src/deepgram_mcp/server.py
@@ -0,0 +1,461 @@
+"""Deepgram MCP Server — FastMCP 2.x with custom HTTP routes."""
+
+import asyncio
+import os
+from pathlib import Path
+
+import aiofiles
+from dotenv import load_dotenv
+from fastmcp import FastMCP
+from starlette.requests import Request
+from starlette.responses import FileResponse, JSONResponse, Response
+
+from deepgram_mcp import file_manager, formatter, transcription, tts
+
+load_dotenv()
+
+mcp = FastMCP("Deepgram MCP")
+
+# ---------------------------------------------------------------------------
+# Shared transcription parameter docstring
+# ---------------------------------------------------------------------------
+_TRANSCRIBE_PARAMS_DOC = """
+Parameters:
+  model: Deepgram model (nova-3, nova-2, enhanced, base, whisper-large). Default: nova-3
+  language: BCP-47 language code (e.g. en, es, fr). Omit for auto-detect.
+  detect_language: Auto-detect language (bool).
+  smart_format: Enable smart formatting (bool, default True).
+  punctuate: Add punctuation (bool).
+  paragraphs: Split into paragraphs (bool).
+  numerals: Convert numbers to digits (bool).
+  measurements: Format measurements (bool).
+  dictation: Dictation mode with spoken punctuation (bool).
+  diarize: Speaker diarization (bool, default True).
+  utterances: Return utterances (bool).
+  utt_split: Pause threshold in seconds for utterance splitting (float).
+  summarize: Generate summary (bool).
+  topics: Detect topics (bool).
+  sentiment: Analyze sentiment (bool).
+  entities: Detect entities (bool).
+  intents: Detect intents (bool).
+  custom_topics: Comma-separated custom topics (up to 100).
+  custom_intents: Comma-separated custom intents.
+  keywords: Comma-separated "term:boost" pairs for keyword boosting.
+  keyterm: Prompting term for Nova-3.
+  search: Comma-separated terms to search for in audio.
+  redact: Comma-separated redaction types (pci, pii, numbers).
+  profanity_filter: Filter profanity (bool).
+  replace: Comma-separated "find:replace" pairs.
+  filler_words: Transcribe filler words like um, uh (bool).
+  multichannel: Treat each channel independently (bool).
+  encoding: Audio encoding (linear16, flac, mulaw, opus, etc.).
+  sample_rate: Audio sample rate in Hz.
+"""
+
+
+def _collect_options(**kwargs) -> dict:
+    """Filter out None values from tool kwargs to build options dict."""
+    return {k: v for k, v in kwargs.items() if v is not None}
+
+
+async def _do_transcribe(source, **kwargs) -> str:
+    """Run transcription, format result, handle truncation."""
+    options = _collect_options(**kwargs)
+    result = await transcription.transcribe(source, options)
+    text = formatter.format_transcription(result)
+    text, was_truncated = formatter.truncate_result(text)
+    if was_truncated:
+        # Save full transcript to file
+        full_text = formatter.format_transcription(result)
+        save_path = file_manager.TTS_DIR / "full_transcript.md"
+        async with aiofiles.open(save_path, "w") as f:
+            await f.write(full_text)
+        text += f"\n\nFull transcript saved to: {save_path}"
+    return text
+
+
+# ---------------------------------------------------------------------------
+# Transcription tools
+# ---------------------------------------------------------------------------
+
+
+@mcp.tool(description="Transcribe audio from a file path on the NUC server." + _TRANSCRIBE_PARAMS_DOC)
+async def transcribe_file(
+    path: str,
+    model: str = "nova-3",
+    language: str | None = None,
+    detect_language: bool | None = None,
+    smart_format: bool = True,
+    punctuate: bool | None = None,
+    paragraphs: bool | None = None,
+    numerals: bool | None = None,
+    measurements: bool | None = None,
+    dictation: bool | None = None,
+    diarize: bool = True,
+    utterances: bool | None = None,
+    utt_split: float | None = None,
+    summarize: bool | None = None,
+    topics: bool | None = None,
+    sentiment: bool | None = None,
+    entities: bool | None = None,
+    intents: bool | None = None,
+    custom_topics: str | None = None,
+    custom_intents: str | None = None,
+    keywords: str | None = None,
+    keyterm: str | None = None,
+    search: str | None = None,
+    redact: str | None = None,
+    profanity_filter: bool | None = None,
+    replace: str | None = None,
+    filler_words: bool | None = None,
+    multichannel: bool | None = None,
+    encoding: str | None = None,
+    sample_rate: int | None = None,
+) -> str:
+    """Transcribe an audio file from a filesystem path on the NUC."""
+    file_path = Path(path)
+    if not file_path.is_file():
+        return f"Error: File not found: {path}"
+    return await _do_transcribe(
+        file_path,
+        model=model, language=language, detect_language=detect_language,
+        smart_format=smart_format, punctuate=punctuate, paragraphs=paragraphs,
+        numerals=numerals, measurements=measurements, dictation=dictation,
+        diarize=diarize, utterances=utterances, utt_split=utt_split,
+        summarize=summarize, topics=topics, sentiment=sentiment,
+        entities=entities, intents=intents,
+        custom_topics=custom_topics, custom_intents=custom_intents,
+        keywords=keywords, keyterm=keyterm, search=search,
+        redact=redact, profanity_filter=profanity_filter, replace=replace,
+        filler_words=filler_words, multichannel=multichannel,
+        encoding=encoding, sample_rate=sample_rate,
+    )
+
+
+@mcp.tool(description="Transcribe audio from a public URL." + _TRANSCRIBE_PARAMS_DOC)
+async def transcribe_url(
+    url: str,
+    model: str = "nova-3",
+    language: str | None = None,
+    detect_language: bool | None = None,
+    smart_format: bool = True,
+    punctuate: bool | None = None,
+    paragraphs: bool | None = None,
+    numerals: bool | None = None,
+    measurements: bool | None = None,
+    dictation: bool | None = None,
+    diarize: bool = True,
+    utterances: bool | None = None,
+    utt_split: float | None = None,
+    summarize: bool | None = None,
+    topics: bool | None = None,
+    sentiment: bool | None = None,
+    entities: bool | None = None,
+    intents: bool | None = None,
+    custom_topics: str | None = None,
+    custom_intents: str | None = None,
+    keywords: str | None = None,
+    keyterm: str | None = None,
+    search: str | None = None,
+    redact: str | None = None,
+    profanity_filter: bool | None = None,
+    replace: str | None = None,
+    filler_words: bool | None = None,
+    multichannel: bool | None = None,
+    encoding: str | None = None,
+    sample_rate: int | None = None,
+) -> str:
+    """Transcribe audio from a publicly accessible URL."""
+    if not url.startswith(("http://", "https://")):
+        return "Error: URL must start with http:// or https://"
+    return await _do_transcribe(
+        url,
+        model=model, language=language, detect_language=detect_language,
+        smart_format=smart_format, punctuate=punctuate, paragraphs=paragraphs,
+        numerals=numerals, measurements=measurements, dictation=dictation,
+        diarize=diarize, utterances=utterances, utt_split=utt_split,
+        summarize=summarize, topics=topics, sentiment=sentiment,
+        entities=entities, intents=intents,
+        custom_topics=custom_topics, custom_intents=custom_intents,
+        keywords=keywords, keyterm=keyterm, search=search,
+        redact=redact, profanity_filter=profanity_filter, replace=replace,
+        filler_words=filler_words, multichannel=multichannel,
+        encoding=encoding, sample_rate=sample_rate,
+    )
+
+
+@mcp.tool(description="Transcribe a previously uploaded audio file." + _TRANSCRIBE_PARAMS_DOC)
+async def transcribe_uploaded(
+    filename: str,
+    model: str = "nova-3",
+    language: str | None = None,
+    detect_language: bool | None = None,
+    smart_format: bool = True,
+    punctuate: bool | None = None,
+    paragraphs: bool | None = None,
+    numerals: bool | None = None,
+    measurements: bool | None = None,
+    dictation: bool | None = None,
+    diarize: bool = True,
+    utterances: bool | None = None,
+    utt_split: float | None = None,
+    summarize: bool | None = None,
+    topics: bool | None = None,
+    sentiment: bool | None = None,
+    entities: bool | None = None,
+    intents: bool | None = None,
+    custom_topics: str | None = None,
+    custom_intents: str | None = None,
+    keywords: str | None = None,
+    keyterm: str | None = None,
+    search: str | None = None,
+    redact: str | None = None,
+    profanity_filter: bool | None = None,
+    replace: str | None = None,
+    filler_words: bool | None = None,
+    multichannel: bool | None = None,
+    encoding: str | None = None,
+    sample_rate: int | None = None,
+) -> str:
+    """Transcribe a file that was uploaded via the /upload endpoint."""
+    file_path = file_manager.get_file_path(file_manager.UPLOAD_DIR, filename)
+    if file_path is None:
+        return f"Error: Uploaded file not found: {filename}"
+    return await _do_transcribe(
+        file_path,
+        model=model, language=language, detect_language=detect_language,
+        smart_format=smart_format, punctuate=punctuate, paragraphs=paragraphs,
+        numerals=numerals, measurements=measurements, dictation=dictation,
+        diarize=diarize, utterances=utterances, utt_split=utt_split,
+        summarize=summarize, topics=topics, sentiment=sentiment,
+        entities=entities, intents=intents,
+        custom_topics=custom_topics, custom_intents=custom_intents,
+        keywords=keywords, keyterm=keyterm, search=search,
+        redact=redact, profanity_filter=profanity_filter, replace=replace,
+        filler_words=filler_words, multichannel=multichannel,
+        encoding=encoding, sample_rate=sample_rate,
+    )
+
+
+# ---------------------------------------------------------------------------
+# TTS tools
+# ---------------------------------------------------------------------------
+
+
+@mcp.tool(description="Convert text to speech using Deepgram Aura-2 voices. Returns download URL for the generated audio file.")
+async def text_to_speech(
+    text: str,
+    model: str = "aura-2-asteria-en",
+    encoding: str = "mp3",
+    sample_rate: int = 24000,
+    container: str | None = None,
+) -> str:
+    """Generate speech audio from text."""
+    audio_bytes, filename = await tts.text_to_speech(
+        text, model=model, encoding=encoding,
+        sample_rate=sample_rate, container=container,
+    )
+    save_path = file_manager.TTS_DIR / filename
+    async with aiofiles.open(save_path, "wb") as f:
+        await f.write(audio_bytes)
+    size_mb = round(len(audio_bytes) / (1024 * 1024), 2)
+    host = os.getenv("HOST", "0.0.0.0")
+    port = os.getenv("PORT", "8009")
+    download_url = f"http://192.168.1.3:{port}/files/{filename}"
+    return (
+        f"Audio generated successfully.\n"
+        f"- **File:** {filename}\n"
+        f"- **Size:** {size_mb} MB\n"
+        f"- **Model:** {model}\n"
+        f"- **Encoding:** {encoding}\n"
+        f"- **Download:** {download_url}"
+    )
+
+
+@mcp.tool(description="List available Deepgram Aura-2 TTS voices. Optionally filter by language code (en, es, de, fr, nl, it, ja).")
+async def list_tts_voices(language: str | None = None) -> str:
+    """List available TTS voices."""
+    voices = tts.list_voices(language)
+    if not voices:
+        return f"No voices found for language: {language}"
+    lines = [f"## Available TTS Voices ({len(voices)} total)\n"]
+    current_lang = None
+    for v in voices:
+        if v["language"] != current_lang:
+            current_lang = v["language"]
+            lines.append(f"\n### {current_lang.upper()}")
+        gender_icon = "F" if v["gender"] == "female" else "M"
+        lines.append(f"- `{v['id']}` — {v['name']} ({gender_icon}) — {v['description']}")
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# File management tools
+# ---------------------------------------------------------------------------
+
+
+@mcp.tool(description="List files in the upload directory.")
+async def list_uploaded_files() -> str:
+    """List all uploaded audio files."""
+    files = file_manager.list_files(file_manager.UPLOAD_DIR)
+    if not files:
+        return "No uploaded files found."
+    lines = ["## Uploaded Files\n"]
+    lines.append("| File | Size (MB) | Modified |")
+    lines.append("|------|-----------|----------|")
+    for f in files:
+        lines.append(f"| {f['name']} | {f['size_mb']} | {f['modified']} |")
+    return "\n".join(lines)
+
+
+@mcp.tool(description="List generated TTS audio files.")
+async def list_generated_files() -> str:
+    """List all generated TTS output files."""
+    files = file_manager.list_files(file_manager.TTS_DIR)
+    if not files:
+        return "No generated files found."
+    port = os.getenv("PORT", "8009")
+    lines = ["## Generated Files\n"]
+    lines.append("| File | Size (MB) | Download URL |")
+    lines.append("|------|-----------|-------------|")
+    for f in files:
+        url = f"http://192.168.1.3:{port}/files/{f['name']}"
+        lines.append(f"| {f['name']} | {f['size_mb']} | {url} |")
+    return "\n".join(lines)
+
+
+@mcp.tool(description="Get upload endpoint URL and example curl command for uploading audio files.")
+async def get_upload_info() -> str:
+    """Return upload endpoint info and usage example."""
+    port = os.getenv("PORT", "8009")
+    return (
+        f"## File Upload\n\n"
+        f"**Endpoint:** `POST http://192.168.1.3:{port}/upload`\n\n"
+        f"**Example:**\n```bash\n"
+        f"curl -X POST http://192.168.1.3:{port}/upload -F \"file=@recording.m4a\"\n"
+        f"```\n\n"
+        f"Then use `transcribe_uploaded(filename=\"...\")` with the returned filename."
+    )
+
+
+@mcp.tool(description="Delete an uploaded or generated file. file_type: 'upload' or 'generated'.")
+async def delete_file(filename: str, file_type: str = "upload") -> str:
+    """Delete a file from uploads or generated directory."""
+    directory = file_manager.UPLOAD_DIR if file_type == "upload" else file_manager.TTS_DIR
+    success = file_manager.delete_file(directory, filename)
+    if success:
+        return f"Deleted: {filename}"
+    return f"File not found or could not be deleted: {filename}"
+
+
+# ---------------------------------------------------------------------------
+# Utility tools
+# ---------------------------------------------------------------------------
+
+
+@mcp.tool(description="Convert audio format or sample rate using ffmpeg. Useful for preprocessing before transcription.")
+async def convert_audio(
+    input_path: str,
+    output_format: str = "wav",
+    sample_rate: int | None = None,
+) -> str:
+    """Convert audio file to a different format or sample rate."""
+    src = Path(input_path)
+    if not src.is_file():
+        return f"Error: Input file not found: {input_path}"
+
+    stem = src.stem
+    dest = file_manager.UPLOAD_DIR / f"{stem}_converted.{output_format}"
+
+    cmd = ["ffmpeg", "-i", str(src), "-y"]
+    if sample_rate:
+        cmd.extend(["-ar", str(sample_rate)])
+    cmd.append(str(dest))
+
+    proc = await asyncio.create_subprocess_exec(
+        *cmd,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    _, stderr = await proc.communicate()
+
+    if proc.returncode != 0:
+        return f"Conversion failed: {stderr.decode().strip()}"
+
+    size_mb = round(dest.stat().st_size / (1024 * 1024), 2)
+    return (
+        f"Converted successfully.\n"
+        f"- **Output:** {dest}\n"
+        f"- **Format:** {output_format}\n"
+        f"- **Size:** {size_mb} MB"
+    )
+
+
+@mcp.tool(description="Verify Deepgram API key and check account/project info.")
+async def check_api_status() -> str:
+    """Check if the Deepgram API key is valid."""
+    status = await transcription.check_api_status()
+    if status["valid"]:
+        projects = status.get("projects", [])
+        lines = ["## Deepgram API Status: Valid\n"]
+        if projects:
+            lines.append("### Projects")
+            for p in projects:
+                lines.append(f"- **{p['name']}** (`{p['id']}`)")
+        return "\n".join(lines)
+    return f"## Deepgram API Status: Invalid\n\nError: {status.get('error', 'Unknown')}"
+
+
+# ---------------------------------------------------------------------------
+# Custom HTTP endpoints (FastMCP custom_route)
+# ---------------------------------------------------------------------------
+
+
+@mcp.custom_route("/health", methods=["GET"])
+async def health_endpoint(request: Request) -> Response:
+    """Health check endpoint for Docker."""
+    return JSONResponse({"status": "ok", "service": "deepgram-mcp"})
+
+
+@mcp.custom_route("/upload", methods=["POST"])
+async def upload_endpoint(request: Request) -> Response:
+    """Multipart file upload — streams to disk."""
+    content_type = request.headers.get("content-type", "")
+    if "multipart/form-data" not in content_type:
+        return JSONResponse(
+            {"error": "Content-Type must be multipart/form-data"},
+            status_code=400,
+        )
+
+    form = await request.form()
+    upload = form.get("file")
+    if upload is None:
+        return JSONResponse({"error": "No 'file' field in form data"}, status_code=400)
+
+    content = await upload.read()
+    result = await file_manager.save_upload(upload.filename or "upload", content)
+    return JSONResponse(result)
+
+
+@mcp.custom_route("/files/{name:path}", methods=["GET"])
+async def files_endpoint(request: Request) -> Response:
+    """Serve generated TTS files for download."""
+    name = request.path_params["name"]
+    file_path = file_manager.get_file_path(file_manager.TTS_DIR, name)
+    if file_path is None:
+        return JSONResponse({"error": "File not found"}, status_code=404)
+    return FileResponse(str(file_path), filename=name)
+
+
+# ---------------------------------------------------------------------------
+# Run server
+# ---------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    host = os.getenv("HOST", "0.0.0.0")
+    port = int(os.getenv("PORT", "8009"))
+    mcp.run(
+        transport="http",
+        host=host,
+        port=port,
+    )
--- a/deepgram-mcp/src/deepgram_mcp/splitter.py
+++ b/deepgram-mcp/src/deepgram_mcp/splitter.py
@@ -0,0 +1,230 @@
+"""FFmpeg-based audio splitting for files exceeding the Deepgram size limit."""
+
+import asyncio
+import json
+import shutil
+import tempfile
+from pathlib import Path
+
+
+async def get_audio_duration(file_path: Path) -> float:
+    """Get audio duration in seconds using ffprobe."""
+    proc = await asyncio.create_subprocess_exec(
+        "ffprobe",
+        "-v", "quiet",
+        "-print_format", "json",
+        "-show_format",
+        str(file_path),
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout, stderr = await proc.communicate()
+
+    if proc.returncode != 0:
+        raise RuntimeError(
+            f"ffprobe failed (exit {proc.returncode}): {stderr.decode().strip()}"
+        )
+
+    info = json.loads(stdout)
+    return float(info["format"]["duration"])
+
+
+def get_file_size_mb(file_path: Path) -> float:
+    """Return the file size in megabytes."""
+    return file_path.stat().st_size / (1024 * 1024)
+
+
+async def split_audio(
+    file_path: Path,
+    max_chunk_mb: int = 1500,
+) -> list[Path]:
+    """Split an audio file into chunks of approximately max_chunk_mb each.
+
+    Uses ffmpeg's segment muxer with stream copy (no re-encoding).
+    If the file is already under the limit, returns [file_path] unchanged.
+    """
+    size_mb = get_file_size_mb(file_path)
+    if size_mb <= max_chunk_mb:
+        return [file_path]
+
+    duration = await get_audio_duration(file_path)
+    if duration <= 0:
+        raise ValueError(f"Invalid audio duration: {duration}s")
+
+    # Calculate segment time so each chunk is ~max_chunk_mb
+    segment_time = int(duration * max_chunk_mb / size_mb)
+    if segment_time < 1:
+        segment_time = 1
+
+    tmp_dir = Path(tempfile.mkdtemp(prefix="deepgram_chunks_"))
+    ext = file_path.suffix or ".wav"
+    pattern = str(tmp_dir / f"chunk_%03d{ext}")
+
+    proc = await asyncio.create_subprocess_exec(
+        "ffmpeg",
+        "-i", str(file_path),
+        "-f", "segment",
+        "-segment_time", str(segment_time),
+        "-c", "copy",
+        "-v", "warning",
+        pattern,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    _, stderr = await proc.communicate()
+
+    if proc.returncode != 0:
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+        raise RuntimeError(
+            f"ffmpeg split failed (exit {proc.returncode}): {stderr.decode().strip()}"
+        )
+
+    chunks = sorted(tmp_dir.glob(f"chunk_*{ext}"))
+    if not chunks:
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+        raise RuntimeError("ffmpeg produced no output chunks")
+
+    return chunks
+
+
+def merge_transcription_results(
+    results: list[dict],
+    chunk_durations: list[float],
+) -> dict:
+    """Merge multiple Deepgram transcription responses into a single result.
+
+    Adjusts all timestamps by cumulative offset so chunks stitch together
+    correctly in the final timeline.
+    """
+    if not results:
+        return {}
+    if len(results) == 1:
+        return results[0]
+
+    # Compute cumulative time offsets for each chunk
+    offsets = [0.0]
+    for dur in chunk_durations[:-1]:
+        offsets.append(offsets[-1] + dur)
+
+    merged_transcript_parts: list[str] = []
+    merged_words: list[dict] = []
+    merged_paragraphs: list[dict] = []
+    merged_utterances: list[dict] = []
+    merged_topics: list[dict] = []
+    merged_entities: list[dict] = []
+    merged_summaries: list[dict] = []
+    merged_sentiments: list[dict] = []
+
+    # Keep metadata from the first result as the base
+    base = results[0].copy()
+
+    for idx, result in enumerate(results):
+        offset = offsets[idx]
+
+        # Extract channel transcript data
+        channels = (
+            result.get("results", {}).get("channels", [])
+        )
+        if channels:
+            alt = channels[0].get("alternatives", [{}])[0]
+            transcript = alt.get("transcript", "")
+            if transcript:
+                merged_transcript_parts.append(transcript)
+
+            for word in alt.get("words", []):
+                adjusted = word.copy()
+                adjusted["start"] = round(word.get("start", 0) + offset, 3)
+                adjusted["end"] = round(word.get("end", 0) + offset, 3)
+                merged_words.append(adjusted)
+
+            for para in alt.get("paragraphs", {}).get("paragraphs", []):
+                adjusted = para.copy()
+                adjusted["start"] = round(para.get("start", 0) + offset, 3)
+                adjusted["end"] = round(para.get("end", 0) + offset, 3)
+                if "sentences" in adjusted:
+                    adjusted["sentences"] = [
+                        {
+                            **s,
+                            "start": round(s.get("start", 0) + offset, 3),
+                            "end": round(s.get("end", 0) + offset, 3),
+                        }
+                        for s in adjusted["sentences"]
+                    ]
+                merged_paragraphs.append(adjusted)
+
+        # Utterances (diarization)
+        for utt in result.get("results", {}).get("utterances", []):
+            adjusted = utt.copy()
+            adjusted["start"] = round(utt.get("start", 0) + offset, 3)
+            adjusted["end"] = round(utt.get("end", 0) + offset, 3)
+            if "words" in adjusted:
+                adjusted["words"] = [
+                    {
+                        **w,
+                        "start": round(w.get("start", 0) + offset, 3),
+                        "end": round(w.get("end", 0) + offset, 3),
+                    }
+                    for w in adjusted["words"]
+                ]
+            merged_utterances.append(adjusted)
+
+        # Topics, entities, summaries, sentiments -- concatenate lists
+        res = result.get("results", {})
+        merged_topics.extend(res.get("topics", {}).get("segments", []))
+        merged_entities.extend(res.get("entities", {}).get("segments", []))
+        merged_summaries.extend(
+            res.get("summary", {}).get("results", [])
+            or res.get("summaries", [])
+        )
+        merged_sentiments.extend(
+            res.get("sentiments", {}).get("segments", [])
+        )
+
+    # Assemble merged output
+    if "results" not in base:
+        base["results"] = {}
+
+    merged_results = base["results"]
+
+    # Rebuild channels
+    if merged_results.get("channels"):
+        channel = merged_results["channels"][0]
+        alt = channel.get("alternatives", [{}])[0]
+        alt["transcript"] = " ".join(merged_transcript_parts)
+        alt["words"] = merged_words
+        if merged_paragraphs:
+            alt["paragraphs"] = {"paragraphs": merged_paragraphs}
+        channel["alternatives"] = [alt]
+        merged_results["channels"] = [channel]
+
+    if merged_utterances:
+        merged_results["utterances"] = merged_utterances
+    if merged_topics:
+        merged_results.setdefault("topics", {})["segments"] = merged_topics
+    if merged_entities:
+        merged_results.setdefault("entities", {})["segments"] = merged_entities
+    if merged_summaries:
+        merged_results["summaries"] = merged_summaries
+    if merged_sentiments:
+        merged_results.setdefault("sentiments", {})["segments"] = merged_sentiments
+
+    return base
+
+
+def cleanup_chunks(chunk_paths: list[Path]) -> None:
+    """Delete temporary chunk files and their parent directory if it's a temp dir."""
+    if not chunk_paths:
+        return
+
+    parent = chunk_paths[0].parent
+
+    for path in chunk_paths:
+        try:
+            if path.is_file():
+                path.unlink()
+        except OSError:
+            pass
+
+    # Remove the temp directory if it's empty and looks like our temp dir
+    if parent.name.startswith("deepgram_chunks_"):
+        shutil.rmtree(parent, ignore_errors=True)
--- a/deepgram-mcp/src/deepgram_mcp/transcription.py
+++ b/deepgram-mcp/src/deepgram_mcp/transcription.py
@@ -0,0 +1,211 @@
+"""Speech-to-text transcription via Deepgram REST API (httpx)."""
+
+import os
+from pathlib import Path
+from typing import Union
+
+import httpx
+
+DEEPGRAM_API_URL = "https://api.deepgram.com/v1/listen"
+
+MIME_TYPES: dict[str, str] = {
+    ".mp3": "audio/mpeg",
+    ".wav": "audio/wav",
+    ".m4a": "audio/mp4",
+    ".flac": "audio/flac",
+    ".ogg": "audio/ogg",
+    ".webm": "audio/webm",
+    ".wma": "audio/x-ms-wma",
+    ".aac": "audio/aac",
+    ".mp4": "video/mp4",
+}
+
+MAX_FILE_SIZE_MB = 2000
+
+
+def _get_api_key() -> str:
+    key = os.getenv("DEEPGRAM_API_KEY", "")
+    if not key:
+        raise ValueError("DEEPGRAM_API_KEY environment variable is not set")
+    return key
+
+
+def _get_mime_type(file_path: Path) -> str:
+    return MIME_TYPES.get(file_path.suffix.lower(), "application/octet-stream")
+
+
+def build_query_params(params: dict) -> dict:
+    """Build Deepgram API query parameters from tool kwargs.
+
+    Filters None values, maps comma-separated strings to repeated params,
+    and converts booleans to lowercase strings.
+    """
+    filtered = {k: v for k, v in params.items() if v is not None}
+    query: dict = {}
+
+    # Direct fields (string/number/bool)
+    direct_fields = [
+        "model", "version", "language", "detect_language",
+        "smart_format", "punctuate", "paragraphs", "numerals",
+        "measurements", "dictation",
+        "diarize", "utterances", "utt_split",
+        "summarize", "topics", "sentiment", "entities", "intents",
+        "profanity_filter", "filler_words",
+        "multichannel",
+        "encoding", "sample_rate",
+        "keyterm",
+    ]
+    for field in direct_fields:
+        if field in filtered:
+            val = filtered[field]
+            if isinstance(val, bool):
+                query[field] = str(val).lower()
+            else:
+                query[field] = val
+
+    # Default diarize to true
+    if "diarize" not in query:
+        query["diarize"] = "true"
+
+    # Comma-separated -> repeated query params
+    csv_fields = [
+        "custom_topics", "custom_intents", "search",
+        "redact", "replace", "keywords",
+    ]
+    for field in csv_fields:
+        if field in filtered:
+            val = filtered[field]
+            if isinstance(val, str):
+                items = [s.strip() for s in val.split(",") if s.strip()]
+            elif isinstance(val, list):
+                items = val
+            else:
+                continue
+            if items:
+                query[field] = items
+
+    return query
+
+
+async def transcribe(
+    source: Union[str, Path, bytes],
+    options: dict,
+) -> dict:
+    """Transcribe audio from a URL, file path, or raw bytes.
+
+    Returns the full Deepgram transcription response as a dict.
+    """
+    api_key = _get_api_key()
+    query_params = build_query_params(options)
+    headers = {"Authorization": f"Token {api_key}"}
+
+    # URL source
+    if isinstance(source, str) and source.startswith(("http://", "https://")):
+        headers["Content-Type"] = "application/json"
+        async with httpx.AsyncClient(timeout=600.0) as client:
+            resp = await client.post(
+                DEEPGRAM_API_URL,
+                params=query_params,
+                headers=headers,
+                json={"url": source},
+            )
+            resp.raise_for_status()
+            return resp.json()
+
+    # File path source
+    if isinstance(source, (str, Path)):
+        file_path = Path(source)
+        if not file_path.is_file():
+            raise FileNotFoundError(f"Audio file not found: {file_path}")
+
+        file_size_mb = file_path.stat().st_size / (1024 * 1024)
+
+        # Large file handling via chunked splitting
+        if file_size_mb > MAX_FILE_SIZE_MB:
+            return await _transcribe_large_file(file_path, query_params, headers)
+
+        data = file_path.read_bytes()
+        mime_type = _get_mime_type(file_path)
+        headers["Content-Type"] = mime_type
+        async with httpx.AsyncClient(timeout=600.0) as client:
+            resp = await client.post(
+                DEEPGRAM_API_URL,
+                params=query_params,
+                headers=headers,
+                content=data,
+            )
+            resp.raise_for_status()
+            return resp.json()
+
+    # Raw bytes source
+    if isinstance(source, bytes):
+        headers["Content-Type"] = "application/octet-stream"
+        async with httpx.AsyncClient(timeout=600.0) as client:
+            resp = await client.post(
+                DEEPGRAM_API_URL,
+                params=query_params,
+                headers=headers,
+                content=source,
+            )
+            resp.raise_for_status()
+            return resp.json()
+
+    raise TypeError(f"Unsupported source type: {type(source)}")
+
+
+async def _transcribe_large_file(
+    file_path: Path, query_params: dict, headers: dict
+) -> dict:
+    """Split a large file into chunks, transcribe each, and merge results."""
+    from . import splitter
+
+    chunks = await splitter.split_audio(file_path)
+    try:
+        api_key = _get_api_key()
+        results = []
+        chunk_durations = []
+        for chunk in chunks:
+            data = chunk.read_bytes()
+            mime_type = _get_mime_type(chunk)
+            chunk_headers = {
+                **headers,
+                "Content-Type": mime_type,
+            }
+            async with httpx.AsyncClient(timeout=600.0) as client:
+                resp = await client.post(
+                    DEEPGRAM_API_URL,
+                    params=query_params,
+                    headers=chunk_headers,
+                    content=data,
+                )
+                resp.raise_for_status()
+                result = resp.json()
+            results.append(result)
+            duration = (result.get("metadata") or {}).get("duration", 0.0)
+            chunk_durations.append(duration)
+        return splitter.merge_transcription_results(results, chunk_durations)
+    finally:
+        splitter.cleanup_chunks(chunks)
+
+
+async def check_api_status() -> dict:
+    """Verify the Deepgram API key by listing projects.
+
+    Returns dict with 'valid' (bool), 'projects' (list), and 'error' (str|None).
+    """
+    try:
+        api_key = _get_api_key()
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            resp = await client.get(
+                "https://api.deepgram.com/v1/projects",
+                headers={"Authorization": f"Token {api_key}"},
+            )
+            resp.raise_for_status()
+            data = resp.json()
+        projects = [
+            {"id": p.get("project_id", ""), "name": p.get("name", "")}
+            for p in data.get("projects", [])
+        ]
+        return {"valid": True, "projects": projects, "error": None}
+    except Exception as exc:
+        return {"valid": False, "projects": [], "error": str(exc)}
--- a/deepgram-mcp/src/deepgram_mcp/tts.py
+++ b/deepgram-mcp/src/deepgram_mcp/tts.py
@@ -0,0 +1,197 @@
+"""Deepgram Text-to-Speech wrapper using Aura-2 voices (httpx REST API)."""
+
+from __future__ import annotations
+
+import os
+import time
+
+import httpx
+
+DEEPGRAM_TTS_URL = "https://api.deepgram.com/v1/speak"
+
+ENCODING_TO_EXT: dict[str, str] = {
+    "mp3": "mp3",
+    "linear16": "wav",
+    "wav": "wav",
+    "flac": "flac",
+    "opus": "opus",
+    "aac": "aac",
+    "mulaw": "wav",
+}
+
+# Real Deepgram Aura-2 voice IDs (format: aura-2-{name}-{lang})
+VOICES: list[dict[str, str]] = [
+    # English (US) - Feminine
+    {"id": "aura-2-asteria-en", "name": "Asteria", "language": "en", "locale": "en-US", "gender": "female", "description": "Warm professional"},
+    {"id": "aura-2-luna-en", "name": "Luna", "language": "en", "locale": "en-US", "gender": "female", "description": "Soft gentle"},
+    {"id": "aura-2-athena-en", "name": "Athena", "language": "en", "locale": "en-US", "gender": "female", "description": "Authoritative"},
+    {"id": "aura-2-aurora-en", "name": "Aurora", "language": "en", "locale": "en-US", "gender": "female", "description": "Bright energetic"},
+    {"id": "aura-2-thalia-en", "name": "Thalia", "language": "en", "locale": "en-US", "gender": "female", "description": "Natural conversational"},
+    {"id": "aura-2-andromeda-en", "name": "Andromeda", "language": "en", "locale": "en-US", "gender": "female", "description": "Clear articulate"},
+    {"id": "aura-2-helena-en", "name": "Helena", "language": "en", "locale": "en-US", "gender": "female", "description": "Elegant polished"},
+    {"id": "aura-2-callista-en", "name": "Callista", "language": "en", "locale": "en-US", "gender": "female", "description": "Friendly upbeat"},
+    {"id": "aura-2-cora-en", "name": "Cora", "language": "en", "locale": "en-US", "gender": "female", "description": "Calm soothing"},
+    {"id": "aura-2-electra-en", "name": "Electra", "language": "en", "locale": "en-US", "gender": "female", "description": "Dynamic expressive"},
+    {"id": "aura-2-iris-en", "name": "Iris", "language": "en", "locale": "en-US", "gender": "female", "description": "Bright cheerful"},
+    {"id": "aura-2-juno-en", "name": "Juno", "language": "en", "locale": "en-US", "gender": "female", "description": "Confident mature"},
+    {"id": "aura-2-minerva-en", "name": "Minerva", "language": "en", "locale": "en-US", "gender": "female", "description": "Wise scholarly"},
+    {"id": "aura-2-ophelia-en", "name": "Ophelia", "language": "en", "locale": "en-US", "gender": "female", "description": "Dramatic expressive"},
+    {"id": "aura-2-phoebe-en", "name": "Phoebe", "language": "en", "locale": "en-US", "gender": "female", "description": "Youthful fresh"},
+    {"id": "aura-2-selene-en", "name": "Selene", "language": "en", "locale": "en-US", "gender": "female", "description": "Serene ethereal"},
+    {"id": "aura-2-vesta-en", "name": "Vesta", "language": "en", "locale": "en-US", "gender": "female", "description": "Warm nurturing"},
+    {"id": "aura-2-cordelia-en", "name": "Cordelia", "language": "en", "locale": "en-US", "gender": "female", "description": "Regal composed"},
+    {"id": "aura-2-delia-en", "name": "Delia", "language": "en", "locale": "en-US", "gender": "female", "description": "Light melodic"},
+    {"id": "aura-2-harmonia-en", "name": "Harmonia", "language": "en", "locale": "en-US", "gender": "female", "description": "Balanced harmonious"},
+    {"id": "aura-2-amalthea-en", "name": "Amalthea", "language": "en", "locale": "en-US", "gender": "female", "description": "Gentle nurturing"},
+    {"id": "aura-2-janus-en", "name": "Janus", "language": "en", "locale": "en-US", "gender": "female", "description": "Versatile adaptive"},
+    # English (US) - Masculine
+    {"id": "aura-2-orion-en", "name": "Orion", "language": "en", "locale": "en-US", "gender": "male", "description": "Deep resonant"},
+    {"id": "aura-2-arcas-en", "name": "Arcas", "language": "en", "locale": "en-US", "gender": "male", "description": "Youthful energetic"},
+    {"id": "aura-2-orpheus-en", "name": "Orpheus", "language": "en", "locale": "en-US", "gender": "male", "description": "Expressive poetic"},
+    {"id": "aura-2-zeus-en", "name": "Zeus", "language": "en", "locale": "en-US", "gender": "male", "description": "Commanding powerful"},
+    {"id": "aura-2-apollo-en", "name": "Apollo", "language": "en", "locale": "en-US", "gender": "male", "description": "Bright confident"},
+    {"id": "aura-2-atlas-en", "name": "Atlas", "language": "en", "locale": "en-US", "gender": "male", "description": "Strong steady"},
+    {"id": "aura-2-hermes-en", "name": "Hermes", "language": "en", "locale": "en-US", "gender": "male", "description": "Quick articulate"},
+    {"id": "aura-2-jupiter-en", "name": "Jupiter", "language": "en", "locale": "en-US", "gender": "male", "description": "Authoritative warm"},
+    {"id": "aura-2-mars-en", "name": "Mars", "language": "en", "locale": "en-US", "gender": "male", "description": "Bold assertive"},
+    {"id": "aura-2-neptune-en", "name": "Neptune", "language": "en", "locale": "en-US", "gender": "male", "description": "Calm deep"},
+    {"id": "aura-2-odysseus-en", "name": "Odysseus", "language": "en", "locale": "en-US", "gender": "male", "description": "Storyteller adventurous"},
+    {"id": "aura-2-pluto-en", "name": "Pluto", "language": "en", "locale": "en-US", "gender": "male", "description": "Dark mysterious"},
+    {"id": "aura-2-saturn-en", "name": "Saturn", "language": "en", "locale": "en-US", "gender": "male", "description": "Mature wise"},
+    {"id": "aura-2-aries-en", "name": "Aries", "language": "en", "locale": "en-US", "gender": "male", "description": "Energetic dynamic"},
+    # English (GB)
+    {"id": "aura-2-pandora-en", "name": "Pandora", "language": "en", "locale": "en-GB", "gender": "female", "description": "British female"},
+    {"id": "aura-2-draco-en", "name": "Draco", "language": "en", "locale": "en-GB", "gender": "male", "description": "British male"},
+    # English (AU)
+    {"id": "aura-2-theia-en", "name": "Theia", "language": "en", "locale": "en-AU", "gender": "female", "description": "Australian female"},
+    {"id": "aura-2-hyperion-en", "name": "Hyperion", "language": "en", "locale": "en-AU", "gender": "male", "description": "Australian male"},
+    # Spanish - Mexican
+    {"id": "aura-2-estrella-es", "name": "Estrella", "language": "es", "locale": "es-MX", "gender": "female", "description": "Mexican female"},
+    {"id": "aura-2-olivia-es", "name": "Olivia", "language": "es", "locale": "es-MX", "gender": "female", "description": "Mexican female warm"},
+    {"id": "aura-2-sirio-es", "name": "Sirio", "language": "es", "locale": "es-MX", "gender": "male", "description": "Mexican male"},
+    {"id": "aura-2-javier-es", "name": "Javier", "language": "es", "locale": "es-MX", "gender": "male", "description": "Mexican male warm"},
+    {"id": "aura-2-luciano-es", "name": "Luciano", "language": "es", "locale": "es-MX", "gender": "male", "description": "Mexican male expressive"},
+    {"id": "aura-2-valerio-es", "name": "Valerio", "language": "es", "locale": "es-MX", "gender": "male", "description": "Mexican male confident"},
+    # Spanish - Peninsular
+    {"id": "aura-2-carina-es", "name": "Carina", "language": "es", "locale": "es-ES", "gender": "female", "description": "Castilian female"},
+    {"id": "aura-2-diana-es", "name": "Diana", "language": "es", "locale": "es-ES", "gender": "female", "description": "Castilian female elegant"},
+    {"id": "aura-2-agustina-es", "name": "Agustina", "language": "es", "locale": "es-ES", "gender": "female", "description": "Castilian female classic"},
+    {"id": "aura-2-silvia-es", "name": "Silvia", "language": "es", "locale": "es-ES", "gender": "female", "description": "Castilian female bright"},
+    {"id": "aura-2-nestor-es", "name": "Nestor", "language": "es", "locale": "es-ES", "gender": "male", "description": "Castilian male"},
+    {"id": "aura-2-alvaro-es", "name": "Alvaro", "language": "es", "locale": "es-ES", "gender": "male", "description": "Castilian male confident"},
+    # Spanish - Colombian / Argentine / LatAm
+    {"id": "aura-2-celeste-es", "name": "Celeste", "language": "es", "locale": "es-CO", "gender": "female", "description": "Colombian female"},
+    {"id": "aura-2-gloria-es", "name": "Gloria", "language": "es", "locale": "es-CO", "gender": "female", "description": "Colombian female warm"},
+    {"id": "aura-2-antonia-es", "name": "Antonia", "language": "es", "locale": "es-AR", "gender": "female", "description": "Argentine female"},
+    {"id": "aura-2-aquila-es", "name": "Aquila", "language": "es", "locale": "es-419", "gender": "male", "description": "Latin American male"},
+    {"id": "aura-2-selena-es", "name": "Selena", "language": "es", "locale": "es-419", "gender": "female", "description": "Latin American female"},
+    # German
+    {"id": "aura-2-elara-de", "name": "Elara", "language": "de", "locale": "de-DE", "gender": "female", "description": "German female natural"},
+    {"id": "aura-2-aurelia-de", "name": "Aurelia", "language": "de", "locale": "de-DE", "gender": "female", "description": "German female elegant"},
+    {"id": "aura-2-lara-de", "name": "Lara", "language": "de", "locale": "de-DE", "gender": "female", "description": "German female youthful"},
+    {"id": "aura-2-kara-de", "name": "Kara", "language": "de", "locale": "de-DE", "gender": "female", "description": "German female confident"},
+    {"id": "aura-2-viktoria-de", "name": "Viktoria", "language": "de", "locale": "de-DE", "gender": "female", "description": "German female strong"},
+    {"id": "aura-2-julius-de", "name": "Julius", "language": "de", "locale": "de-DE", "gender": "male", "description": "German male professional"},
+    {"id": "aura-2-fabian-de", "name": "Fabian", "language": "de", "locale": "de-DE", "gender": "male", "description": "German male warm"},
+    # French
+    {"id": "aura-2-agathe-fr", "name": "Agathe", "language": "fr", "locale": "fr-FR", "gender": "female", "description": "French female"},
+    {"id": "aura-2-hector-fr", "name": "Hector", "language": "fr", "locale": "fr-FR", "gender": "male", "description": "French male"},
+    # Dutch
+    {"id": "aura-2-beatrix-nl", "name": "Beatrix", "language": "nl", "locale": "nl-NL", "gender": "female", "description": "Dutch female classic"},
+    {"id": "aura-2-daphne-nl", "name": "Daphne", "language": "nl", "locale": "nl-NL", "gender": "female", "description": "Dutch female natural"},
+    {"id": "aura-2-cornelia-nl", "name": "Cornelia", "language": "nl", "locale": "nl-NL", "gender": "female", "description": "Dutch female warm"},
+    {"id": "aura-2-hestia-nl", "name": "Hestia", "language": "nl", "locale": "nl-NL", "gender": "female", "description": "Dutch female gentle"},
+    {"id": "aura-2-rhea-nl", "name": "Rhea", "language": "nl", "locale": "nl-NL", "gender": "female", "description": "Dutch female bright"},
+    {"id": "aura-2-leda-nl", "name": "Leda", "language": "nl", "locale": "nl-NL", "gender": "female", "description": "Dutch female elegant"},
+    {"id": "aura-2-sander-nl", "name": "Sander", "language": "nl", "locale": "nl-NL", "gender": "male", "description": "Dutch male natural"},
+    {"id": "aura-2-lars-nl", "name": "Lars", "language": "nl", "locale": "nl-NL", "gender": "male", "description": "Dutch male confident"},
+    {"id": "aura-2-roman-nl", "name": "Roman", "language": "nl", "locale": "nl-NL", "gender": "male", "description": "Dutch male warm"},
+    # Italian
+    {"id": "aura-2-melia-it", "name": "Melia", "language": "it", "locale": "it-IT", "gender": "female", "description": "Italian female natural"},
+    {"id": "aura-2-maia-it", "name": "Maia", "language": "it", "locale": "it-IT", "gender": "female", "description": "Italian female warm"},
+    {"id": "aura-2-cinzia-it", "name": "Cinzia", "language": "it", "locale": "it-IT", "gender": "female", "description": "Italian female elegant"},
+    {"id": "aura-2-livia-it", "name": "Livia", "language": "it", "locale": "it-IT", "gender": "female", "description": "Italian female classic"},
+    {"id": "aura-2-demetra-it", "name": "Demetra", "language": "it", "locale": "it-IT", "gender": "female", "description": "Italian female strong"},
+    {"id": "aura-2-elio-it", "name": "Elio", "language": "it", "locale": "it-IT", "gender": "male", "description": "Italian male bright"},
+    {"id": "aura-2-flavio-it", "name": "Flavio", "language": "it", "locale": "it-IT", "gender": "male", "description": "Italian male warm"},
+    {"id": "aura-2-cesare-it", "name": "Cesare", "language": "it", "locale": "it-IT", "gender": "male", "description": "Italian male authoritative"},
+    {"id": "aura-2-perseo-it", "name": "Perseo", "language": "it", "locale": "it-IT", "gender": "male", "description": "Italian male dynamic"},
+    {"id": "aura-2-dionisio-it", "name": "Dionisio", "language": "it", "locale": "it-IT", "gender": "male", "description": "Italian male expressive"},
+    # Japanese
+    {"id": "aura-2-uzume-ja", "name": "Uzume", "language": "ja", "locale": "ja-JP", "gender": "female", "description": "Japanese female natural"},
+    {"id": "aura-2-izanami-ja", "name": "Izanami", "language": "ja", "locale": "ja-JP", "gender": "female", "description": "Japanese female elegant"},
+    {"id": "aura-2-ebisu-ja", "name": "Ebisu", "language": "ja", "locale": "ja-JP", "gender": "male", "description": "Japanese male warm"},
+    {"id": "aura-2-fujin-ja", "name": "Fujin", "language": "ja", "locale": "ja-JP", "gender": "male", "description": "Japanese male dynamic"},
+    {"id": "aura-2-ama-ja", "name": "Ama", "language": "ja", "locale": "ja-JP", "gender": "male", "description": "Japanese male natural"},
+]
+
+
+def list_voices(language: str | None = None) -> list[dict[str, str]]:
+    """Return voices, optionally filtered by language code.
+
+    The filter is case-insensitive and matches both short codes ("en")
+    and full locale codes ("en-US").
+    """
+    if language is None:
+        return sorted(VOICES, key=lambda v: (v["language"], v["name"]))
+
+    lang = language.lower()
+    filtered = [
+        v for v in VOICES
+        if v["language"].lower() == lang or v["locale"].lower() == lang
+    ]
+    return sorted(filtered, key=lambda v: (v["language"], v["name"]))
+
+
+def get_voice_info(voice_id: str) -> dict[str, str] | None:
+    """Return voice info dict for a given voice ID, or None if not found."""
+    for voice in VOICES:
+        if voice["id"] == voice_id:
+            return voice
+    return None
+
+
+async def text_to_speech(
+    text: str,
+    model: str = "aura-2-asteria-en",
+    encoding: str = "mp3",
+    sample_rate: int = 24000,
+    container: str | None = None,
+) -> tuple[bytes, str]:
+    """Convert text to speech using Deepgram Aura-2 REST API.
+
+    Returns a tuple of (audio_bytes, suggested_filename).
+    """
+    api_key = os.environ.get("DEEPGRAM_API_KEY", "")
+    if not api_key:
+        raise ValueError("DEEPGRAM_API_KEY environment variable is not set")
+
+    params: dict = {
+        "model": model,
+        "encoding": encoding,
+        "sample_rate": str(sample_rate),
+    }
+    if container is not None:
+        params["container"] = container
+
+    headers = {
+        "Authorization": f"Token {api_key}",
+        "Content-Type": "application/json",
+    }
+
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        resp = await client.post(
+            DEEPGRAM_TTS_URL,
+            params=params,
+            headers=headers,
+            json={"text": text},
+        )
+        resp.raise_for_status()
+        audio_bytes = resp.content
+
+    ext = ENCODING_TO_EXT.get(encoding, encoding)
+    model_short = model.rsplit("-", 1)[-1]
+    timestamp = int(time.time())
+    filename = f"tts_{timestamp}_{model_short}.{ext}"
+
+    return audio_bytes, filename