Start Listening – System.Speech.Recognition

After having tested the speech synthesis part of SAPI, this post will present a little example to do speech recognition (SR). The goal of our little exploration is to listen to what a user says (using the English language) and “ignoring” anything but the word “Quit” (or any word that closely resembles “Quit”).

This goal is to be accomplished in 3 steps:

  1. Initialising the SR service to a user-defined speech, e.g. US or UK English
  2. Starting the SR service
  3. Repeatedly polling for user input (with respect to some grace period) on the default audio input device (microphone, etc.)

The following (commented) code makes use of the defined steps and implements the goal very simply.

    1:  using System;
    2:  using System.Collections.Generic;
    3:  using System.Speech;
    4:  using System.Speech.Recognition;
    5:   
    6:  namespace Recognizer
    7:  {
    8:      class Program
    9:      {
   10:          static void Main(string[] args)
   11:          {
   12:              Console.WriteLine("Speech recognition example");
   13:   
   14:              System.Collections.ObjectModel.ReadOnlyCollection<RecognizerInfo> recognizedSpeeches = 
        System.Speech.Recognition.SpeechRecognitionEngine.InstalledRecognizers();
   15:   
   16:              if (recognizedSpeeches != null)
   17:              {
   18:                  /*
   19:                   * present SRs if any
   20:                   */
   21:                  Console.WriteLine("Recognized Speeches:");
   22:                  foreach (RecognizerInfo recognizedSpeech in recognizedSpeeches)
   23:                  {
   24:                      Console.WriteLine(string.Format("\t[+]{0} - {1}", 
               recognizedSpeech.Name, 
              recognizedSpeech.Description));
   25:                  }
   26:   
   27:                  int recognizerNumber = 0;
   28:   
   29:                  /*
   30:                   * let the user choose a localised SR - if any
   31:                   */
   32:                  Console.WriteLine("Which SR do you want to use?:");
   33:                  string srString = Console.ReadLine();
   34:   
   35:                  if (Int32.TryParse(srString, out recognizerNumber) &&
   36:                      recognizerNumber > -1 && recognizerNumber < recognizedSpeeches.Count)
   37:                  {
   38:                      Console.WriteLine("You choose: " + string.Format("{0}", 
                recognizedSpeeches[recognizerNumber].Name));
   39:                  }
   40:   
   41:                  SpeechRecognitionEngine engine =            new SpeechRecognitionEngine(recognizedSpeeches[recognizerNumber]);
   42:   
   43:                  /*
   44:                   * use the default audio device (configure to be appropriate mic, etc.)
   45:                   * hook up the event handler which notifies us in case SR has recognised something
   46:                   */
   47:                  engine.SetInputToDefaultAudioDevice();
   48:                  engine.SpeechDetected += new EventHandler<SpeechDetectedEventArgs>(engine_SpeechDetected);
   49:                  engine.LoadGrammar(new DictationGrammar());
   50:   
   51:                  Console.WriteLine("Please press <any key> and then say something ... ");
   52:                  Console.ReadLine();
   53:   
   54:                  /*
   55:                   * poll user input
   56:                   * 
   57:                   * wait for 30 seconds for some input
   58:                   * quit if user has said something like quit
   59:                   */
   60:                  while (true)
   61:                  {
   62:                      RecognitionResult srResult = engine.Recognize(new TimeSpan(0, 0, 30));
   63:   
   64:                      Console.WriteLine("Recognized: {0}\r\nConfidence: {1}", 
              srResult.Text, 
              srResult.Confidence.ToString());
   65:   
   66:                      if (srResult.Text.ToLower() == "quit")
   67:                      {
   68:                          Console.WriteLine("Quit ...");
   69:                          break;
   70:                      }
   71:                  }
   72:              }
   73:   
   74:              /*
   75:               * the end
   76:               */
   77:              Console.WriteLine("Press <any key> to exit!");
   78:              Console.ReadLine();
   79:          }
   80:   
   81:          static void engine_SpeechDetected(object sender, SpeechDetectedEventArgs e)
   82:          {
   83:              Console.WriteLine("Detected Speech ...");
   84:          }
   85:      }
   86:  }