Skip to content

shh

Adapters

mariuspruvot/shh

Adapters API Reference

External integrations: audio recording, OpenAI APIs, clipboard.

Audio Recorder

`shh.adapters.audio.recorder`

`AudioRecorder`

Async Context Manager for recording audio from the microphone. Usage: async with AudioRecorder(sample_rate=16000, max_duration=60) as recorder: # Recording in progress await do_something()

Source code in shh/adapters/audio/recorder.py

class AudioRecorder:
    """
    Async Context Manager for recording audio from the microphone.
    Usage:
        async with AudioRecorder(sample_rate=16000, max_duration=60) as recorder:
            # Recording in progress
            await do_something()
    """

    MAX_RECORDING_DURATION = 300  # seconds

    def __init__(self, sample_rate: int = SAMPLE_RATE, max_duration: float | None = None) -> None:
        """
        Initialize the AudioRecorder.
        """
        self._sample_rate = sample_rate
        self._max_duration = max_duration or self.MAX_RECORDING_DURATION
        self._chunks: list[NDArray[np.float32]] = []
        self._stream: sd.InputStream | None = None
        self._start_time: float | None = None

    async def __aenter__(self) -> "AudioRecorder":
        """
        Start the audio recording stream.
        Returns:
            Self for use within the async context manager.
        """

        def callback(
            indata: NDArray[np.float32], frames: int, time_info: object, status: sd.CallbackFlags
        ) -> None:
            """
            Called by sounddevice for each audio block every 100ms.
            Args:
                indata: The recorded audio data.
                frames: Number of frames.
                time_info: Time information.
                status: Status flags.
            """
            if status:
                logger.warning(f"Audio recording status: {status}")

            self._chunks.append(indata.copy())

        try:
            self._stream = sd.InputStream(
                samplerate=self._sample_rate,
                channels=1,
                dtype=np.float32,
                callback=callback,
            )
            await asyncio.to_thread(self._stream.start)
            self._start_time = time.time()

            return self
        except Exception as e:
            raise AudioRecordingError(f"Failed to start audio recording: {e}") from e

    async def __aexit__(
        self,
        exc_type: type[BaseException] | None,
        exc_value: BaseException | None,
        traceback: object,
    ) -> None:
        """
        Stop the audio recording stream and finalize the recording.
        """
        if self._stream:
            try:
                await asyncio.to_thread(self._stream.stop)
                await asyncio.to_thread(self._stream.close)
            except Exception as e:
                logger.error(f"Error stopping audio stream: {e}")

    def get_audio(self) -> NDArray[np.float32]:
        """
        Retrieve the recorded audio data as a single NumPy array.
        Returns:
            A NumPy array containing the recorded audio data.
        """
        if not self._chunks:
            return np.array([], dtype=np.float32)

        audio = np.concatenate(self._chunks, axis=0)
        return audio.flatten()

    def elapsed_time(self) -> float:
        """
        Get the elapsed recording time in seconds.
        Returns:
            Elapsed time in seconds.
        """
        if self._start_time is None:
            return 0.0
        return time.time() - self._start_time

    def is_max_duration_reached(self) -> bool:
        """
        return True if the maximum recording duration has been exceeded.
        """
        return self.elapsed_time() >= self._max_duration

`aenter()` `async`

Start the audio recording stream. Returns: Self for use within the async context manager.

Source code in shh/adapters/audio/recorder.py

async def __aenter__(self) -> "AudioRecorder":
    """
    Start the audio recording stream.
    Returns:
        Self for use within the async context manager.
    """

    def callback(
        indata: NDArray[np.float32], frames: int, time_info: object, status: sd.CallbackFlags
    ) -> None:
        """
        Called by sounddevice for each audio block every 100ms.
        Args:
            indata: The recorded audio data.
            frames: Number of frames.
            time_info: Time information.
            status: Status flags.
        """
        if status:
            logger.warning(f"Audio recording status: {status}")

        self._chunks.append(indata.copy())

    try:
        self._stream = sd.InputStream(
            samplerate=self._sample_rate,
            channels=1,
            dtype=np.float32,
            callback=callback,
        )
        await asyncio.to_thread(self._stream.start)
        self._start_time = time.time()

        return self
    except Exception as e:
        raise AudioRecordingError(f"Failed to start audio recording: {e}") from e

`aexit(exc_type, exc_value, traceback)` `async`

Stop the audio recording stream and finalize the recording.

Source code in shh/adapters/audio/recorder.py

async def __aexit__(
    self,
    exc_type: type[BaseException] | None,
    exc_value: BaseException | None,
    traceback: object,
) -> None:
    """
    Stop the audio recording stream and finalize the recording.
    """
    if self._stream:
        try:
            await asyncio.to_thread(self._stream.stop)
            await asyncio.to_thread(self._stream.close)
        except Exception as e:
            logger.error(f"Error stopping audio stream: {e}")

`init(sample_rate=SAMPLE_RATE, max_duration=None)`

Initialize the AudioRecorder.

Source code in shh/adapters/audio/recorder.py

def __init__(self, sample_rate: int = SAMPLE_RATE, max_duration: float | None = None) -> None:
    """
    Initialize the AudioRecorder.
    """
    self._sample_rate = sample_rate
    self._max_duration = max_duration or self.MAX_RECORDING_DURATION
    self._chunks: list[NDArray[np.float32]] = []
    self._stream: sd.InputStream | None = None
    self._start_time: float | None = None

`elapsed_time()`

Get the elapsed recording time in seconds. Returns: Elapsed time in seconds.

Source code in shh/adapters/audio/recorder.py

def elapsed_time(self) -> float:
    """
    Get the elapsed recording time in seconds.
    Returns:
        Elapsed time in seconds.
    """
    if self._start_time is None:
        return 0.0
    return time.time() - self._start_time

`get_audio()`

Retrieve the recorded audio data as a single NumPy array. Returns: A NumPy array containing the recorded audio data.

Source code in shh/adapters/audio/recorder.py

def get_audio(self) -> NDArray[np.float32]:
    """
    Retrieve the recorded audio data as a single NumPy array.
    Returns:
        A NumPy array containing the recorded audio data.
    """
    if not self._chunks:
        return np.array([], dtype=np.float32)

    audio = np.concatenate(self._chunks, axis=0)
    return audio.flatten()

`is_max_duration_reached()`

return True if the maximum recording duration has been exceeded.

Source code in shh/adapters/audio/recorder.py

def is_max_duration_reached(self) -> bool:
    """
    return True if the maximum recording duration has been exceeded.
    """
    return self.elapsed_time() >= self._max_duration

Audio Processor

`shh.adapters.audio.processor`

`save_audio_to_wav(audio_data, sample_rate=SAMPLE_RATE)`

Save audio data to a temporary WAV file. This function is not responsible for cleaning up the temporary file.

Parameters:

Name	Type	Description	Default
`audio_data`	`NDArray[float32]`	The audio data to save.	required
`sample_rate`	`int`	The sample rate of the audio data.	`SAMPLE_RATE`

Returns: Path: The path to the saved WAV file. Raises: AudioProcessingError: If there is an error saving the audio file.

Source code in shh/adapters/audio/processor.py

def save_audio_to_wav(
    audio_data: NDArray[np.float32],
    sample_rate: int = SAMPLE_RATE,
) -> Path:
    """
    Save audio data to a temporary WAV file.
    This function is not responsible for cleaning up the temporary file.

    Args:
        audio_data (NDArray[np.float32]): The audio data to save.
        sample_rate (int): The sample rate of the audio data.
    Returns:
        Path: The path to the saved WAV file.
    Raises:
        AudioProcessingError: If there is an error saving the audio file.
    """
    try:
        # Convert float32 audio data to int16
        audio_int16: NDArray[np.int16] = (audio_data * 32767).astype(np.int16)

        # Create a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
            temp_file_path = Path(temp_file.name)

        # Write the audio data to the WAV file
        wavfile.write(temp_file_path, sample_rate, audio_int16)

        return temp_file_path

    except Exception as e:
        raise AudioProcessingError(f"Failed to save audio to WAV file: {e}") from e

Whisper Client

`shh.adapters.whisper.client`

`transcribe_audio(audio_file_path, api_key, model='whisper-1')` `async`

Transcribe audio using OpenAI's Whisper API.

Source code in shh/adapters/whisper/client.py

async def transcribe_audio(
    audio_file_path: Path,
    api_key: str,
    model: str = "whisper-1",
) -> str:
    """
    Transcribe audio using OpenAI's Whisper API.
    """
    client = AsyncOpenAI(api_key=api_key)
    try:
        with audio_file_path.open("rb") as audio_file:
            audio_transcription = await client.audio.transcriptions.create(
                file=audio_file,
                model=model,
            )
            return audio_transcription.text

    except Exception as e:
        logger.error(f"Transcription failed: {e}")
        raise TranscriptionError("Failed to transcribe audio.") from e

LLM Formatter

`shh.adapters.llm.formatter`

`FormattedTranscription`

Bases: BaseModel

Structured output from LLM formatting.

Source code in shh/adapters/llm/formatter.py

class FormattedTranscription(BaseModel):
    """Structured output from LLM formatting."""

    text: str = Field(..., description="The formatted transcription text.")

`format_transcription(text, style=TranscriptionStyle.NEUTRAL, api_key='', target_language=None)` `async`

Format the transcription text using an AI agent based on the specified style.

Parameters:

Name	Type	Description	Default
`text`	`str`	Raw transcription text from Whisper	required
`style`	`TranscriptionStyle`	Formatting style to apply (neutral, casual, business)	`NEUTRAL`
`api_key`	`str`	OpenAI API key for LLM calls	`''`
`target_language`	`str \| None`	Optional language to translate to (e.g., "English", "French", "Spanish")	`None`

Returns:

Type	Description
`FormattedTranscription`	FormattedTranscription with styled and optionally translated text

Source code in shh/adapters/llm/formatter.py

async def format_transcription(
    text: str,
    style: TranscriptionStyle = TranscriptionStyle.NEUTRAL,
    api_key: str = "",
    target_language: str | None = None,
) -> FormattedTranscription:
    """
    Format the transcription text using an AI agent based on the specified style.

    Args:
        text: Raw transcription text from Whisper
        style: Formatting style to apply (neutral, casual, business)
        api_key: OpenAI API key for LLM calls
        target_language: Optional language to translate to (e.g., "English", "French", "Spanish")

    Returns:
        FormattedTranscription with styled and optionally translated text
    """

    # Handle neutral style - no LLM call needed
    if style == TranscriptionStyle.NEUTRAL and not target_language:
        return FormattedTranscription(text=text)

    # For neutral style with translation, use casual prompt
    system_prompt = STYLE_PROMPTS.get(style, STYLE_PROMPTS[TranscriptionStyle.CASUAL])

    # Build the user prompt
    user_prompt = f"Format this transcription: {text}"
    if target_language:
        user_prompt = f"Format this transcription and translate it to {target_language}: {text}"

    # Create OpenAI model with API key
    model = OpenAIChatModel("gpt-4o-mini", provider=OpenAIProvider(api_key=api_key))

    # Create PydanticAI agent
    agent: Agent[None, FormattedTranscription] = Agent(
        model,
        output_type=FormattedTranscription,
        system_prompt=system_prompt,
    )

    try:
        result = await agent.run(user_prompt)
        return result.output
    except Exception as e:
        logger.error(f"Formatting failed: {e}")
        raise FormattingError(f"Failed to format transcription: {e}") from e

Clipboard Manager

`shh.adapters.clipboard.manager`

Clipboard operations for copying transcription results.

`copy_to_clipboard(text)` `async`

Copy text to system clipboard.

Parameters:

Name	Type	Description	Default
`text`	`str`	Text to copy to clipboard	required

Returns:

Type	Description
`None`	None

Source code in shh/adapters/clipboard/manager.py

async def copy_to_clipboard(text: str) -> None:
    """
    Copy text to system clipboard.

    Args:
        text: Text to copy to clipboard

    Returns:
        None
    """
    # Fail silently if clipboard unavailable
    with contextlib.suppress(Exception):
        pyperclip.copy(text)