Commit b848fa1b authored by Kim-Celine Kahl's avatar Kim-Celine Kahl

Merge branch '2-design-voice-recognition-service'

parents 8d02d7de 64386a24
Pipeline #2932 passed with stages
in 11 minutes and 43 seconds
......@@ -48,6 +48,11 @@ You can clone the repository using the following command:
git clone https://weisslab.cs.ucl.ac.uk/WEISS/SoftwareRepositories/SNAPPY/scikit-surgeryspeech
If you have problems running the application, you might need to install portaudio
::
brew install portaudio
Running tests
^^^^^^^^^^^^^
......
......@@ -5,4 +5,5 @@
pyaudio
SpeechRecognition
google-api-python-client
oauth2client
\ No newline at end of file
oauth2client
PySide2
\ No newline at end of file
import speech_recognition as sr
"""
Speech API algorithm
"""
# pylint: disable=no-name-in-module
import os
import logging
import json
import speech_recognition as sr
from PySide2.QtCore import QObject, Signal
LOGGER = logging.getLogger("voice_recognition_logger")
class VoiceRecognitionService:
class VoiceRecognitionService(QObject):
"""
Voice Recognition service which takes an microphone input and converts it
to text by using the Google Cloud Speech-to-Text API
"""
next = Signal()
previous = Signal()
undo = Signal()
quit = Signal()
voice_command = Signal(str)
def __init__(self):
"""
Constructor.
"""
LOGGER.info("Creating Voice Recognition Service")
# Need this for SignalInstance
super(VoiceRecognitionService, self).__init__()
self.stop_listen = None
# this is to add the credentials for the google cloud api
# set the environment variable GOOGLE_APPLICATION_CREDENTIALS to the path of your json file with credentials
# set the environment variable GOOGLE_APPLICATION_CREDENTIALS
# to the path of your json file with credentials
key_file_path = os.environ['GOOGLE_APPLICATION_CREDENTIALS']
with open(key_file_path, 'r') as file:
self.credentials = file.read()
# this raises a ValueError if the credential file isn't a valid json
json.loads(self.credentials)
LOGGER.info("Created Voice Recognition Service")
def listen(self):
"""
Method which starts listening in the background
"""
# self.next.emit()
# Record Audio
r = sr.Recognizer()
m = sr.Microphone()
recognizer = sr.Recognizer()
microphone = sr.Microphone()
# initialization of the background listening thread
print("Say something!")
self.stop_listen = r.listen_in_background(m, self.callback)
LOGGER.info("Say something!")
self.stop_listen = recognizer\
.listen_in_background(microphone, self.callback)
def callback(self, recognizer, audio):
# this is called by the background thread, converting speech in a string
"""
Method which gets called by the background listener
:param recognizer: recognizer from Python speech API
:param audio: audio input (e.g. microphone)
:return:
"""
# this is called by the background thread,
# converting speech in a string
try:
# google cloud speech to text with credentials (json file)
words = recognizer.recognize_google_cloud(audio, credentials_json=self.credentials)
print("You said: " + words)
# if the string equals a certain keyword (here "start") the background thread is stopped and the a method
words = recognizer\
.recognize_google_cloud(audio,
credentials_json=self.credentials)
LOGGER.info("You said: %s", words)
# if the string equals a certain keyword (here "start")
# the background thread is stopped and the a method
# is called to listen to one single command
if words == "start ":
self.stop_listen(wait_for_stop=False)
self.listen_to_command()
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
LOGGER.info("Google Speech Recognition could not understand audio")
except sr.RequestError as exception:
LOGGER.info("Could not request results from Google Speech "
"Recognition service; %s", exception)
def listen_to_command(self):
r = sr.Recognizer()
"""
This method gets called when a specific command is said.
It then listens for specific commands and converts them to QT Signals
:return:
"""
recognizer = sr.Recognizer()
# listen to a single command
with sr.Microphone() as source:
print("Listening for command")
audio = r.listen(source)
LOGGER.info("Listening for command")
audio = recognizer.listen(source)
try:
# convert command to string, this string should later be used to fire a certain GUI command
words = r.recognize_google_cloud(audio, credentials_json=self.credentials)
print("You said: " + words)
# convert command to string,
# this string should later be used to fire a certain GUI command
words = recognizer.\
recognize_google_cloud(audio,
credentials_json=self.credentials)
# convert the spoken input in a signal
# for next, quit, previous and undo there are specific signals
# if none of them is said, a generic signal is emitted, containing
# the string of the spoken input
if words == "next ":
self.next.emit()
elif words == "quit ":
self.quit.emit()
elif words == "previous ":
self.previous.emit()
elif words == "undo ":
self.undo.emit()
else:
self.voice_command.emit(words)
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
# call self.listen() again to get the background thread start listening again
self.listen()
LOGGER.info("Google Speech Recognition could not understand audio")
except sr.RequestError as exception:
LOGGER.info("Could not request results from Google Speech "
"Recognition service; %s", exception)
# call self.listen() again
# to get the background thread start listening again
self.listen()
......@@ -5,7 +5,7 @@
import argparse
from sksurgeryspeech import __version__
from sksurgeryspeech.ui.sksurgeryspeech_demo import run_demo
from sksurgeryspeech.ui import sksurgeryspeech_demo
def main(args=None):
......@@ -22,4 +22,5 @@ def main(args=None):
args = parser.parse_args(args)
run_demo()
demo = sksurgeryspeech_demo.SpeechRecognitionDemo()
demo.run_demo()
# coding=utf-8
"""
Demo for the Speech API module
"""
# pylint: disable=c-extension-no-member, no-name-in-module, no-self-use
import sys
import logging
import PySide2.QtCore
from sksurgeryspeech.algorithms import first_test_speech_api as speech_api
import time
"""Speech api demo module"""
LOGGER = logging.getLogger("voice_recognition_logger")
def run_demo():
# this is the main call to start the background thread listening,
# which also later has to be called within the SmartLiver code
voice_recognition = speech_api.VoiceRecognitionService()
voice_recognition.listen()
for _ in range(500):
time.sleep(0.1)
class VoiceListener(PySide2.QtCore.QObject):
"""
Class which contains the slots for the demo application
"""
@PySide2.QtCore.Slot()
def on_next(self):
"""
Slot for next signal
:return:
"""
LOGGER.info("Next signal caught")
@PySide2.QtCore.Slot()
def on_previous(self):
"""
Slot for the previous signal
:return:
"""
LOGGER.info("Previous signal caught")
@PySide2.QtCore.Slot()
def on_undo(self):
"""
Slot for the undo signal
:return:
"""
LOGGER.info("Undo signal caught")
@PySide2.QtCore.Slot()
def on_quit(self):
"""
Slot for the quit signal
Quits application
:return:
"""
LOGGER.info("Quit signal caught... Exit application")
PySide2.QtCore.QCoreApplication.quit()
@PySide2.QtCore.Slot()
def on_voice_signal(self, input_string):
"""
Slot for the voice signal,
which just contains the microphone input as string
:return:
"""
LOGGER.info("Generic voice signal caught with input: %s", input_string)
class SpeechRecognitionDemo(PySide2.QtCore.QObject):
"""
Demo class for the Speech API module
"""
def __init__(self):
"""
Constructor.
"""
super(SpeechRecognitionDemo, self).__init__()
self.voice_recognition = speech_api.VoiceRecognitionService()
self.listener = VoiceListener()
# connect the Signals emitted by the VoiceRecognitionService()
# with the Slots of the VoiceListener
self.voice_recognition.next.connect(self.listener.on_next)
self.voice_recognition.previous.connect(self.listener.on_previous)
self.voice_recognition.undo.connect(self.listener.on_undo)
self.voice_recognition.quit.connect(self.listener.on_quit)
self.voice_recognition.voice_command\
.connect(self.listener.on_voice_signal)
def run_demo(self):
"""
Entry point to run the demo
:return:
"""
# instantiate the QCoreApplication
app = PySide2.QtCore.QCoreApplication()
# set up the logger
voice_recognition_logger = logging.getLogger("voice_recognition_logger")
voice_recognition_logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
file_handler = logging.FileHandler('voice_recognition_log.log')
file_handler.setLevel(logging.INFO)
voice_recognition_logger.addHandler(console_handler)
voice_recognition_logger.addHandler(file_handler)
# this is the main call to start the background thread listening,
# which also later has to be called within the SmartLiver code
self.voice_recognition.listen()
# start the application, meaning starting the infinite Event Loop which
# stops when the user says "start" followed by "quit"
return sys.exit(app.exec_())
......@@ -2,11 +2,9 @@
"""scikit-surgeryspeech tests"""
from sksurgeryspeech.ui.sksurgeryspeech_demo import run_demo
import six
# Pytest style
from sksurgeryspeech.algorithms import first_test_speech_api as speech_api
def test_function_example():
assert True
# content of: tox.ini , put in same dir as setup.py
[tox]
envlist = py27,py36,lint
envlist = py36,lint
skipsdist = True
[travis]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment