Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions deepgram/clients/agent/v1/websocket/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,7 @@ class Agent(BaseResponse):
greeting: Optional[str] = field(
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
)
tags: Optional[List[str]] = field(
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
)


def __post_init__(self):
"""Handle conversion of dict/list data to proper Speak objects"""
Expand Down Expand Up @@ -350,6 +348,9 @@ class SettingsOptions(BaseResponse):

experimental: Optional[bool] = field(default=False)
type: str = str(AgentWebSocketEvents.Settings)
tags: Optional[List[str]] = field(
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
)
audio: Audio = field(default_factory=Audio)
agent: Agent = field(default_factory=Agent)
mip_opt_out: Optional[bool] = field(
Expand Down
137 changes: 137 additions & 0 deletions examples/agent/tags/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
# SPDX-License-Identifier: MIT
from signal import SIGINT, SIGTERM
import asyncio
import time
from deepgram.utils import verboselogs
from deepgram import (
DeepgramClient,
DeepgramClientOptions,
AgentWebSocketEvents,
SettingsOptions,
)
TTS_TEXT = "Hello, this is a text to speech example using Deepgram."
global warning_notice
warning_notice = True
async def main():
try:
loop = asyncio.get_event_loop()
for signal in (SIGTERM, SIGINT):
loop.add_signal_handler(
signal,
lambda: asyncio.create_task(shutdown(signal, loop, dg_connection)),
)
# example of setting up a client config. logging values: WARNING, VERBOSE, DEBUG, SPAM
config: DeepgramClientOptions = DeepgramClientOptions(
options={
"keepalive": "true",
"microphone_record": "true",
"speaker_playback": "true",
},
# verbose=verboselogs.DEBUG,
)
# Initialize Deepgram client - API key should be set in DEEPGRAM_API_KEY environment variable
# For production testing, make sure your API key has proper permissions
deepgram: DeepgramClient = DeepgramClient("", config)
print("Initialized Deepgram client for production API testing")
# Create a websocket connection to Deepgram
dg_connection = deepgram.agent.asyncwebsocket.v("1")
async def on_open(self, open, **kwargs):
print(f"\n\n{open}\n\n")
async def on_binary_data(self, data, **kwargs):
global warning_notice
if warning_notice:
print("Received binary data")
print("You can do something with the binary data here")
print("OR")
print(
"If you want to simply play the audio, set speaker_playback to true in the options for DeepgramClientOptions"
)
warning_notice = False
async def on_welcome(self, welcome, **kwargs):
print(f"\n\n{welcome}\n\n")
async def on_settings_applied(self, settings_applied, **kwargs):
print(f"\n\n{settings_applied}\n\n")
async def on_conversation_text(self, conversation_text, **kwargs):
print(f"\n\n{conversation_text}\n\n")
async def on_user_started_speaking(self, user_started_speaking, **kwargs):
print(f"\n\n{user_started_speaking}\n\n")
async def on_agent_thinking(self, agent_thinking, **kwargs):
print(f"\n\n{agent_thinking}\n\n")
async def on_agent_started_speaking(self, agent_started_speaking, **kwargs):
print(f"\n\n{agent_started_speaking}\n\n")
async def on_agent_audio_done(self, agent_audio_done, **kwargs):
print(f"\n\n{agent_audio_done}\n\n")
async def on_close(self, close, **kwargs):
print(f"\n\n{close}\n\n")
async def on_error(self, error, **kwargs):
print(f"\n\n{error}\n\n")
async def on_unhandled(self, unhandled, **kwargs):
print(f"\n\n{unhandled}\n\n")
dg_connection.on(AgentWebSocketEvents.Open, on_open)
dg_connection.on(AgentWebSocketEvents.AudioData, on_binary_data)
dg_connection.on(AgentWebSocketEvents.Welcome, on_welcome)
dg_connection.on(AgentWebSocketEvents.SettingsApplied, on_settings_applied)
dg_connection.on(AgentWebSocketEvents.ConversationText, on_conversation_text)
dg_connection.on(
AgentWebSocketEvents.UserStartedSpeaking, on_user_started_speaking
)
dg_connection.on(AgentWebSocketEvents.AgentThinking, on_agent_thinking)
dg_connection.on(
AgentWebSocketEvents.AgentStartedSpeaking, on_agent_started_speaking
)
dg_connection.on(AgentWebSocketEvents.AgentAudioDone, on_agent_audio_done)
dg_connection.on(AgentWebSocketEvents.Close, on_close)
dg_connection.on(AgentWebSocketEvents.Error, on_error)
dg_connection.on(AgentWebSocketEvents.Unhandled, on_unhandled)
# connect to websocket
options = SettingsOptions()
options.agent.think.provider.type = "open_ai"
options.agent.think.provider.model = "gpt-4o-mini"
options.agent.think.prompt = "You are a helpful AI assistant."
options.greeting = "Hello, this is a text to speech example using Deepgram."
options.agent.listen.provider.keyterms = ["hello", "goodbye"]
options.agent.listen.provider.model = "nova-3"
options.agent.listen.provider.type = "deepgram"
options.agent.speak.provider.type = "deepgram"
options.agent.speak.provider.model = "aura-2-thalia-en"
options.agent.language = "en"
# Add tags for production testing
options.tags = ["production-test", "sdk-example", "agent-websocket", "tags-validation"]
print(f"Using tags: {options.tags}")
# Print the full options being sent
print("Options being sent to API:")
print(options.to_json())
print("\n\n✅ Connection established with tags!")
print(f"✅ Tags being used: {options.tags}")
print("\n🎤 You can now speak into your microphone...")
print("The agent will respond using the production API with tags.")
print("Press Ctrl+C to stop.\n\n")
if await dg_connection.start(options) is False:
print("Failed to start connection")
return
# wait until cancelled
try:
while True:
await asyncio.sleep(1)
except asyncio.CancelledError:
# This block will be executed when the shutdown coroutine cancels all tasks
pass
finally:
await dg_connection.finish()
print("Finished")
except ValueError as e:
print(f"Invalid value encountered: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
async def shutdown(signal, loop, dg_connection):
print(f"Received exit signal {signal.name}...")
await dg_connection.finish()
tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
[task.cancel() for task in tasks]
print(f"Cancelling {len(tasks)} outstanding tasks")
await asyncio.gather(*tasks, return_exceptions=True)
loop.stop()
print("Shutdown complete.")
asyncio.run(main())
8 changes: 4 additions & 4 deletions tests/daily_test/test_daily_agent_websocket.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,10 @@ def on_unhandled(self, unhandled, **kwargs):

# Handle special agent tags test case by adding tags to the config
agent_config = test_case["agent_config"].copy()
if test_case.get("test_agent_tags", False):
agent_config["tags"] = ["test", "daily"]

settings.agent = agent_config

if test_case.get("test_agent_tags", False):
settings.tags = ["test", "daily"]
settings.experimental = True # Enable experimental features

print(f"🔧 Starting connection with settings: {settings.to_dict()}")
Expand Down Expand Up @@ -568,7 +568,7 @@ def on_unhandled(self, unhandled, **kwargs):
expected_tags = ["test", "daily"]
# Verify settings contain the expected tags
settings_dict = settings.to_dict()
agent_tags = settings_dict.get("agent", {}).get("tags", [])
agent_tags = settings_dict.get("tags", [])
assert agent_tags == expected_tags, f"Test ID: {unique} - Agent tags should match expected tags"
print(f"✓ Agent tags validated: {agent_tags}")

Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
[
{
"type": "Welcome",
"timestamp": 1753228536.7372491,
"timestamp": 1754089254.059805,
"data": {
"type": "Welcome",
"request_id": "f86f006a-1dc6-484e-b040-3825bedf93ba"
"request_id": "60cc0bbe-be55-4c34-b0c6-e9c138885967"
}
},
{
"type": "Open",
"timestamp": 1753228536.737364,
"timestamp": 1754089254.060123,
"data": {
"type": "Open"
}
},
{
"type": "SettingsApplied",
"timestamp": 1753228536.7819788,
"timestamp": 1754089254.1029801,
"data": {
"type": "SettingsApplied"
}
},
{
"type": "ConversationText",
"timestamp": 1753228537.787007,
"timestamp": 1754089255.110622,
"data": {
"type": "ConversationText",
"role": "user",
Expand All @@ -32,23 +32,23 @@
},
{
"type": "Unhandled",
"timestamp": 1753228537.787831,
"timestamp": 1754089255.1114728,
"data": {
"type": "Unhandled",
"raw": "{\"type\":\"History\",\"role\":\"user\",\"content\":\"Hello, this is a test of agent tags functionality.\"}"
}
},
{
"type": "Unhandled",
"timestamp": 1753228537.7884219,
"timestamp": 1754089255.111763,
"data": {
"type": "Unhandled",
"raw": "{\"type\":\"EndOfThought\"}"
}
},
{
"type": "ConversationText",
"timestamp": 1753228538.68838,
"timestamp": 1754089256.122815,
"data": {
"type": "ConversationText",
"role": "assistant",
Expand All @@ -57,24 +57,24 @@
},
{
"type": "Unhandled",
"timestamp": 1753228538.689159,
"timestamp": 1754089256.12335,
"data": {
"type": "Unhandled",
"raw": "{\"type\":\"History\",\"role\":\"assistant\",\"content\":\"Hello!\"}"
}
},
{
"type": "AgentStartedSpeaking",
"timestamp": 1753228538.7265012,
"timestamp": 1754089256.12362,
"data": {
"total_latency": 0.903870874,
"tts_latency": 0.314808536,
"ttt_latency": 0.589062181
"total_latency": 0.962977896,
"tts_latency": 0.368340208,
"ttt_latency": 0.594637578
}
},
{
"type": "ConversationText",
"timestamp": 1753228539.291852,
"timestamp": 1754089256.6148539,
"data": {
"type": "ConversationText",
"role": "user",
Expand All @@ -83,73 +83,73 @@
},
{
"type": "Unhandled",
"timestamp": 1753228539.292917,
"timestamp": 1754089256.615833,
"data": {
"type": "Unhandled",
"raw": "{\"type\":\"History\",\"role\":\"user\",\"content\":\"Can you confirm you are working with tags enabled?\"}"
}
},
{
"type": "Unhandled",
"timestamp": 1753228539.2931762,
"timestamp": 1754089256.616431,
"data": {
"type": "Unhandled",
"raw": "{\"type\":\"EndOfThought\"}"
}
},
{
"type": "AgentAudioDone",
"timestamp": 1753228539.2934241,
"timestamp": 1754089256.616906,
"data": {
"type": "AgentAudioDone"
}
},
{
"type": "ConversationText",
"timestamp": 1753228540.502542,
"timestamp": 1754089257.768304,
"data": {
"type": "ConversationText",
"role": "assistant",
"content": "Yes, I can confirm that tag functionality is enabled."
"content": "Yes, I can confirm that I am able to work with tags."
}
},
{
"type": "Unhandled",
"timestamp": 1753228540.5037608,
"timestamp": 1754089257.768838,
"data": {
"type": "Unhandled",
"raw": "{\"type\":\"History\",\"role\":\"assistant\",\"content\":\"Yes, I can confirm that tag functionality is enabled.\"}"
"raw": "{\"type\":\"History\",\"role\":\"assistant\",\"content\":\"Yes, I can confirm that I am able to work with tags.\"}"
}
},
{
"type": "AgentStartedSpeaking",
"timestamp": 1753228540.5045602,
"timestamp": 1754089257.7692642,
"data": {
"total_latency": 1.146776066,
"tts_latency": 0.378390996,
"ttt_latency": 0.76838492
"total_latency": 1.157360975,
"tts_latency": 0.385327765,
"ttt_latency": 0.7720331
}
},
{
"type": "ConversationText",
"timestamp": 1753228543.8797429,
"timestamp": 1754089261.3335302,
"data": {
"type": "ConversationText",
"role": "assistant",
"content": "How can I assist you with it?"
"content": "How can I assist you with them?"
}
},
{
"type": "Unhandled",
"timestamp": 1753228543.881195,
"timestamp": 1754089261.334396,
"data": {
"type": "Unhandled",
"raw": "{\"type\":\"History\",\"role\":\"assistant\",\"content\":\"How can I assist you with it?\"}"
"raw": "{\"type\":\"History\",\"role\":\"assistant\",\"content\":\"How can I assist you with them?\"}"
}
},
{
"type": "AgentAudioDone",
"timestamp": 1753228543.9538682,
"timestamp": 1754089261.371368,
"data": {
"type": "AgentAudioDone"
}
Expand Down
Loading