""" gemini_tts_teste.py Author: Descomplicar® Crescimento Digital Link: https://descomplicar.pt Copyright: 2025 Descomplicar® """ # To run this code you need to install the following dependencies: # pip install google-genai import base64 import mimetypes import os import re import struct from google import genai from google.genai import types def save_binary_file(file_name, data): f = open(file_name, "wb") f.write(data) f.close() print(f"File saved to to: {file_name}") def generate_test(): client = genai.Client( api_key=os.environ.get("GEMINI_API_KEY"), ) model = "gemini-2.5-pro-preview-tts" contents = [ types.Content( role="user", parts=[ types.Part.from_text(text="""Bem-vindo à Descomplicar, a agência de aceleração digital que transforma a sua presença online numa máquina de crescimento. Somos especialistas em Marketing Digital, criação de websites profissionais e estratégias que geram resultados reais para o seu negócio. Na Descomplicar, a nossa filosofia é simples: tornar o complexo mais simples. Descomplicamos o marketing digital para que você se possa focar no que faz melhor - gerir o seu negócio. Marque uma reunião connosco e descubra como podemos acelerar o crescimento digital da sua empresa."""), ], ), ] generate_content_config = types.GenerateContentConfig( temperature=1.1, response_modalities=[ "audio", ], speech_config=types.SpeechConfig( voice_config=types.VoiceConfig( prebuilt_voice_config=types.PrebuiltVoiceConfig( voice_name="Autonoe" ) ) ), ) file_index = 0 for chunk in client.models.generate_content_stream( model=model, contents=contents, config=generate_content_config, ): if ( chunk.candidates is None or chunk.candidates[0].content is None or chunk.candidates[0].content.parts is None ): continue if chunk.candidates[0].content.parts[0].inline_data and chunk.candidates[0].content.parts[0].inline_data.data: file_name = f"descomplicar_teste_audio_{file_index}" file_index += 1 inline_data = chunk.candidates[0].content.parts[0].inline_data data_buffer = inline_data.data file_extension = mimetypes.guess_extension(inline_data.mime_type) if file_extension is None: file_extension = ".wav" data_buffer = convert_to_wav(inline_data.data, inline_data.mime_type) save_binary_file(f"{file_name}{file_extension}", data_buffer) else: print(chunk.text) def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes: """Generates a WAV file header for the given audio data and parameters.""" parameters = parse_audio_mime_type(mime_type) bits_per_sample = parameters["bits_per_sample"] sample_rate = parameters["rate"] num_channels = 1 data_size = len(audio_data) bytes_per_sample = bits_per_sample // 8 block_align = num_channels * bytes_per_sample byte_rate = sample_rate * block_align chunk_size = 36 + data_size header = struct.pack( "<4sI4s4sIHHIIHH4sI", b"RIFF", # ChunkID chunk_size, # ChunkSize (total file size - 8 bytes) b"WAVE", # Format b"fmt ", # Subchunk1ID 16, # Subchunk1Size (16 for PCM) 1, # AudioFormat (1 for PCM) num_channels, # NumChannels sample_rate, # SampleRate byte_rate, # ByteRate block_align, # BlockAlign bits_per_sample, # BitsPerSample b"data", # Subchunk2ID data_size # Subchunk2Size (size of audio data) ) return header + audio_data def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]: """Parses bits per sample and rate from an audio MIME type string.""" bits_per_sample = 16 rate = 24000 parts = mime_type.split(";") for param in parts: param = param.strip() if param.lower().startswith("rate="): try: rate_str = param.split("=", 1)[1] rate = int(rate_str) except (ValueError, IndexError): pass elif param.startswith("audio/L"): try: bits_per_sample = int(param.split("L", 1)[1]) except (ValueError, IndexError): pass return {"bits_per_sample": bits_per_sample, "rate": rate} if __name__ == "__main__": generate_test()