
    /UiD                    h   d dl mZ d dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
 ddlmZmZ ddlmZmZ dd	lmZ d
ZdZe
d   Z	 e G d d             Z G d dej.                        Z G d dej.                        Ze G d d             Z G d dej.                        Z G d dej.                        Zy)    )annotationsN)AsyncIterator)	dataclass)AnyCallableLiteral   )npnpt)
AudioInputStreamedAudioInput)get_sentence_based_splitterzYYou will receive partial sentences. Do not complete the sentence, just read out the text.x   )	alloyashcoralechofableonyxnovasageshimmerc                      e Zd ZU dZdZded<   	 dZded<   	 ej                  Z	ded	<   	 dZ
d
ed<   	 dZded<   	  e       Zded<   	 dZded<   y)TTSModelSettingszSettings for a TTS model.NzTTSVoice | Nonevoicer   intbuffer_sizeznpt.DTypeLikedtypezYCallable[[npt.NDArray[np.int16 | np.float32]], npt.NDArray[np.int16 | np.float32]] | Nonetransform_datazXYou will receive partial sentences. Do not complete the sentence just read out the text.strinstructionsz Callable[[str], tuple[str, str]]text_splitterfloat | Nonespeed)__name__
__module____qualname____doc__r   __annotations__r   r
   int16r   r   r!   r   r"   r$        n/home/jay/workspace/tools/ai-image-gen/jaaz-app/server/venv/lib/python3.12/site-packages/agents/voice/model.pyr   r      s    #!E?!
 KS88E=#= 	 a 	c # 
 7R6SM3S E<Vr,   r   c                  f    e Zd ZdZeej                  dd              Zej                  dd       Zy)TTSModelz?A text-to-speech model that can convert text into audio output.c                     y)zThe name of the TTS model.Nr+   selfs    r-   
model_namezTTSModel.model_nameC        	r,   c                     y)zGiven a text string, produces a stream of audio bytes, in PCM format.

        Args:
            text: The text to convert to audio.

        Returns:
            An async iterator of audio bytes, in PCM format.
        Nr+   )r2   textsettingss      r-   runzTTSModel.runI        	r,   Nreturnr    )r6   r    r7   r   r;   zAsyncIterator[bytes])	r%   r&   r'   r(   propertyabcabstractmethodr3   r8   r+   r,   r-   r/   r/   @   s?    I   		 	r,   r/   c                  \    e Zd ZdZej
                  dd       Zej
                  dd       Zy)StreamedTranscriptionSessionz(A streamed transcription of audio input.c                     y)zYields a stream of text transcriptions. Each transcription is a turn in the conversation.

        This method is expected to return only after `close()` is called.
        Nr+   r1   s    r-   transcribe_turnsz-StreamedTranscriptionSession.transcribe_turnsY   s     	r,   c                   K   yw)zCloses the session.Nr+   r1   s    r-   closez"StreamedTranscriptionSession.closea   s      	   N)r;   zAsyncIterator[str])r;   None)r%   r&   r'   r(   r=   r>   rB   rD   r+   r,   r-   r@   r@   V   s7    2  	 r,   r@   c                  P    e Zd ZU dZdZded<   	 dZded<   	 dZded<   	 dZded	<   y)
STTModelSettingsz$Settings for a speech-to-text model.N
str | Nonepromptlanguager#   temperaturezdict[str, Any] | Noneturn_detection)	r%   r&   r'   r(   rJ   r)   rK   rL   rM   r+   r,   r-   rH   rH   g   s:    .FJ/Hj* $K$',0N)0Tr,   rH   c                      e Zd ZdZeej                  dd              Zej                  	 	 	 	 	 	 	 	 	 	 dd       Zej                  	 	 	 	 	 	 	 	 	 	 dd       Z	y)	STTModelz>A speech-to-text model that can convert audio input into text.c                     y)zThe name of the STT model.Nr+   r1   s    r-   r3   zSTTModel.model_name{   r4   r,   c                   K   yw)a  Given an audio input, produces a text transcription.

        Args:
            input: The audio input to transcribe.
            settings: The settings to use for the transcription.
            trace_include_sensitive_data: Whether to include sensitive data in traces.
            trace_include_sensitive_audio_data: Whether to include sensitive audio data in traces.

        Returns:
            The text transcription of the audio input.
        Nr+   r2   inputr7   trace_include_sensitive_data"trace_include_sensitive_audio_datas        r-   
transcribezSTTModel.transcribe   s     & 	rE   c                   K   yw)a  Creates a new transcription session, which you can push audio to, and receive a stream
        of text transcriptions.

        Args:
            input: The audio input to transcribe.
            settings: The settings to use for the transcription.
            trace_include_sensitive_data: Whether to include sensitive data in traces.
            trace_include_sensitive_audio_data: Whether to include sensitive audio data in traces.

        Returns:
            A new transcription session.
        Nr+   rR   s        r-   create_sessionzSTTModel.create_session   s     ( 	rE   Nr:   )
rS   r   r7   rH   rT   boolrU   rY   r;   r    )
rS   r   r7   rH   rT   rY   rU   rY   r;   r@   )
r%   r&   r'   r(   r<   r=   r>   r3   rV   rX   r+   r,   r-   rO   rO   x   s    H   	 # '+	
 -1 
 ( 	! # '+	
 -1 
& r,   rO   c                  \    e Zd ZdZej
                  dd       Zej
                  dd       Zy)VoiceModelProviderzThe base interface for a voice model provider.

    A model provider is responsible for creating speech-to-text and text-to-speech models, given a
    name.
    c                     y)zGet a speech-to-text model by name.

        Args:
            model_name: The name of the model to get.

        Returns:
            The speech-to-text model.
        Nr+   r2   r3   s     r-   get_stt_modelz VoiceModelProvider.get_stt_model   r9   r,   c                     y)z#Get a text-to-speech model by name.Nr+   r]   s     r-   get_tts_modelz VoiceModelProvider.get_tts_model   s    r,   N)r3   rI   r;   rO   )r3   rI   r;   r/   )r%   r&   r'   r(   r=   r>   r^   r`   r+   r,   r-   r[   r[      s;     		 	 	2 2r,   r[   )
__future__r   r=   collections.abcr   dataclassesr   typingr   r   r   importsr
   r   rS   r   r   utilsr   DEFAULT_TTS_INSTRUCTIONSDEFAULT_TTS_BUFFER_SIZETTSVoicer   ABCr/   r@   rH   rO   r[   r+   r,   r-   <module>rk      s    " 
 ) ! ) )  1 . `   ^_ 9 'W 'W 'WTsww ,377 " U U U 2sww 2j2 2r,   