Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions real-time-voicebot/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ASSEMBLYAI_API_KEY=your_assemblyai_api_key
OPENAI_API_KEY=your_openai_api_key
ELEVENLABS_API_KEY=your_elevenlabs_api_key
9 changes: 6 additions & 3 deletions real-time-voicebot/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,16 @@ Before running the application, you need API keys for the following services:
- [Get the API key for OpenAI here →](https://platform.openai.com/api-keys)
- [Get the API key for ElevenLabs here →](https://elevenlabs.io/app/sign-in)

Update the API keys in the code by replacing the placeholders in the `AI_Assistant` class.
Copy `.env.example` to `.env` and add your API keys:

```bash
cp .env.example .env
```

## Run the application

```bash
pip install assemblyai openai elevenlabs python-dotenv
python app.py
```
---
Expand All @@ -36,5 +41,3 @@ python app.py

## Contribution
Contributions are welcome! Please fork the repository and submit a pull request with your improvements.


70 changes: 34 additions & 36 deletions real-time-voicebot/app.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,30 @@
import os
from dotenv import load_dotenv
import assemblyai as aai
from elevenlabs import stream
from elevenlabs.client import ElevenLabs
from openai import OpenAI

load_dotenv()

class AI_Assistant:
def __init__(self):
aai.settings.api_key = "<AssemblyAI API Key>"
self.openai_client = OpenAI(api_key = "<OpenAI API Key>")
self.elevenlabs_api_key = "<ElevenLabs API Key>"

self.elevenlabs_client = ElevenLabs(api_key = self.elevenlabs_api_key)

assemblyai_key = os.getenv("ASSEMBLYAI_API_KEY")
openai_key = os.getenv("OPENAI_API_KEY")
elevenlabs_key = os.getenv("ELEVENLABS_API_KEY")

if not all([assemblyai_key, openai_key, elevenlabs_key]):
raise ValueError(
"Missing required API keys. Please set ASSEMBLYAI_API_KEY, "
"OPENAI_API_KEY, and ELEVENLABS_API_KEY in your .env file."
)

aai.settings.api_key = assemblyai_key
self.openai_client = OpenAI(api_key=openai_key)
self.elevenlabs_client = ElevenLabs(api_key=elevenlabs_key)
self.transcriber = None

self.interaction = [
{"role":"system", "content":"You are a helpful travel guide in London, UK, helping a tourist plan their trip. Be conversational and concise in your responses."},
{"role": "system", "content": "You are a helpful travel guide in London, UK, helping a tourist plan their trip. Be conversational and concise in your responses."},
]

def stop_transcription(self):
Expand All @@ -24,15 +34,12 @@ def stop_transcription(self):

def on_open(self, session_opened: aai.RealtimeSessionOpened):
print("Session ID:", session_opened.session_id)
return

def on_error(self, error: aai.RealtimeError):
print("An error occured:", error)
return

def on_close(self):
print("Closing Session")
return

def on_data(self, transcript: aai.RealtimeTranscript):
if not transcript.text:
Expand All @@ -44,52 +51,43 @@ def on_data(self, transcript: aai.RealtimeTranscript):

def start_transcription(self):
self.transcriber = aai.RealtimeTranscriber(
sample_rate = 16000,
on_data = self.on_data,
on_error = self.on_error,
on_open = self.on_open,
on_close = self.on_close,
end_utterance_silence_threshold = 1000
sample_rate=16000,
on_data=self.on_data,
on_error=self.on_error,
on_open=self.on_open,
on_close=self.on_close,
end_utterance_silence_threshold=1000
)

self.transcriber.connect()
microphone_stream = aai.extras.MicrophoneStream(sample_rate=16000)
self.transcriber.stream(microphone_stream)

def generate_ai_response(self, transcript):

self.stop_transcription()

self.interaction.append({"role":"user", "content": transcript.text})
self.interaction.append({"role": "user", "content": transcript.text})
print(f"\nTourist: {transcript.text}", end="\r\n")

response = self.openai_client.chat.completions.create(
model = "gpt-3.5-turbo",
messages = self.interaction
model="gpt-3.5-turbo",
messages=self.interaction
)

ai_response = response.choices[0].message.content

self.generate_audio(ai_response)

self.start_transcription()
print(f"\nReal-time transcription: ", end="\r\n")

print("\nReal-time transcription: ", end="\r\n")

def generate_audio(self, text):

self.interaction.append({"role":"assistant", "content": text})
self.interaction.append({"role": "assistant", "content": text})
print(f"\nAI Guide: {text}")

audio_stream = self.elevenlabs_client.generate(
text = text,
voice = "Rachel",
stream = True
text=text,
voice="Rachel",
stream=True
)

stream(audio_stream)


greeting = "Thank you for calling London Travel Guide. My name is Rachel, how may I assist you?"
ai_assistant = AI_Assistant()
ai_assistant.generate_audio(greeting)
ai_assistant.start_transcription()
ai_assistant.start_transcription()