git.maemo.org Git - navit-package/blob - navit/speech/espeak/speak.c

   1 /**
   2  * Navit, a modular navigation system.
   3  * Copyright (C) 2005-2008 Navit Team
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License
   7  * version 2 as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the
  16  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  17  * Boston, MA  02110-1301, USA.
  18  */
  19
  20 #define _WIN32_WINNT 0x0500
  21
  22 #include "config.h"
  23
  24 #ifdef HAVE_API_WIN32_BASE
  25 #include <windows.h>
  26 #include <mmsystem.h>
  27 #include <winreg.h>
  28 #else
  29 #include <unistd.h>
  30 #endif
  31
  32 #include <sys/stat.h>
  33 #include <glib.h>
  34 #include "item.h"
  35 #include "plugin.h"
  36 #include "speech.h"
  37 #include "util.h"
  38 #include "file.h"
  39 #include "debug.h"
  40
  41 #include "support/espeak/speech.h"
  42 #include "support/espeak/speak_lib.h"
  43 #include "support/espeak/phoneme.h"
  44 #include "support/espeak/synthesize.h"
  45 #include "support/espeak/voice.h"
  46 #include "support/espeak/translate.h"
  47
  48
  49 #define SAMPLES_PER_BUFFER 1024
  50 #define BUFFERS 8
  51
  52
  53 // ----- some stuff needed by espeak ----------------------------------
  54 char path_home[N_PATH_HOME];    // this is the espeak-data directory
  55 int (* uri_callback)(int, const char *, const char *) = NULL;
  56 int (* phoneme_callback)(const char *) = NULL;
  57 FILE *f_wave = NULL;
  58
  59 int GetFileLength(const char *filename)
  60 {
  61         struct stat statbuf;
  62
  63         if(stat(filename,&statbuf) != 0)
  64                 return(0);
  65
  66         if((statbuf.st_mode & S_IFMT) == S_IFDIR)
  67                 return(-2);  // a directory
  68
  69         return(statbuf.st_size);
  70 }
  71
  72 void MarkerEvent(int type, unsigned int char_position, int value, unsigned char *out_ptr)
  73 {
  74 }
  75
  76 char *Alloc(int size)
  77 {
  78         return g_malloc(size);
  79 }
  80
  81 void Free(void *ptr)
  82 {
  83         g_free(ptr);
  84 }
  85
  86 // --------------------------------------------------------------------
  87
  88
  89 enum speech_messages
  90 {
  91         msg_say = WM_USER,
  92         msg_exit
  93 };
  94
  95 enum speech_state
  96 {
  97         state_available,
  98         state_speaking_phase_1,
  99         state_speaking_phase_2,
 100         state_speaking_phase_3
 101
 102 };
 103
 104 struct speech_priv {
 105         GList *free_buffers;
 106         HWAVEOUT h_wave_out;
 107         enum speech_state state;
 108         GList *phrases;
 109         HWND h_queue;
 110         HANDLE h_message_thread;
 111 };
 112
 113
 114 static void waveout_close(struct speech_priv* sp_priv)
 115 {
 116         waveOutClose(sp_priv->h_wave_out);
 117 }
 118
 119 static BOOL waveout_open(struct speech_priv* sp_priv)
 120 {
 121         MMRESULT result = 0;
 122
 123         HWAVEOUT hwo;
 124         static WAVEFORMATEX wmTemp;
 125         wmTemp.wFormatTag = WAVE_FORMAT_PCM;
 126         wmTemp.nChannels = 1;
 127         wmTemp.nSamplesPerSec = 22050;
 128         wmTemp.wBitsPerSample = 16;
 129         wmTemp.nBlockAlign = wmTemp.nChannels * wmTemp.wBitsPerSample / 8;
 130         wmTemp.nAvgBytesPerSec = wmTemp.nSamplesPerSec * wmTemp.nBlockAlign;
 131         wmTemp.cbSize = 0;
 132         result = waveOutOpen(&hwo, (UINT) WAVE_MAPPER, &wmTemp, (DWORD)sp_priv->h_queue, (DWORD)sp_priv, CALLBACK_WINDOW);
 133         sp_priv->h_wave_out = hwo;
 134
 135         return (result==MMSYSERR_NOERROR);
 136 }
 137
 138 static int wave_out(struct speech_priv* sp_priv)
 139 {
 140         unsigned char wav_outbuf[SAMPLES_PER_BUFFER * 2];
 141         int isDone;
 142
 143         WAVEHDR *WaveHeader = g_list_first(sp_priv->free_buffers)->data;
 144         sp_priv->free_buffers = g_list_remove(sp_priv->free_buffers, WaveHeader);
 145
 146         out_ptr = out_start = wav_outbuf;
 147         out_end = wav_outbuf + sizeof(wav_outbuf);
 148
 149         isDone = WavegenFill(0);
 150
 151         if ( out_ptr < out_end )
 152         {
 153                 memset ( out_ptr, 0, out_end - out_ptr );
 154         }
 155         memcpy(WaveHeader->lpData, wav_outbuf, WaveHeader->dwBufferLength);
 156         waveOutWrite(sp_priv->h_wave_out, WaveHeader, sizeof(WAVEHDR));
 157
 158         return isDone;
 159 }
 160
 161 static BOOL initialise(void)
 162 {
 163         int param;
 164         int result;
 165
 166         WavegenInit(22050,0);   // 22050
 167         if((result = LoadPhData()) != 1)
 168         {
 169                 if(result == -1)
 170                 {
 171                         dbg(0, "Failed to load espeak-data\n");
 172                         return FALSE;
 173                 }
 174                 else
 175                         dbg(0, "Wrong version of espeak-data 0x%x (expects 0x%x) at %s\n",result,version_phdata,path_home);
 176         }
 177         LoadConfig();
 178         SetVoiceStack(NULL);
 179         SynthesizeInit();
 180
 181         for(param=0; param<N_SPEECH_PARAM; param++)
 182                 param_stack[0].parameter[param] = param_defaults[param];
 183
 184         return TRUE;
 185 }
 186
 187 static void fill_buffer(struct speech_priv *this)
 188 {
 189         while ( this->free_buffers && this->state != state_speaking_phase_3 )
 190         {
 191                 if(Generate(phoneme_list,&n_phoneme_list,1)==0)
 192                 {
 193                         if (!SpeakNextClause(NULL,NULL,1))
 194                         {
 195                                 this->state = state_speaking_phase_2;
 196                         }
 197                 }
 198
 199                 if ( wave_out(this)!= 0 && this->state == state_speaking_phase_2)
 200                 {
 201                         this->state = state_speaking_phase_3;
 202                 }
 203         }
 204 }
 205
 206 static void start_speaking(struct speech_priv* sp_priv)
 207 {
 208         char *phrase = g_list_first(sp_priv->phrases)->data;
 209
 210         sp_priv->state = state_speaking_phase_1;
 211
 212         SpeakNextClause(NULL, phrase,0);
 213         wave_out(sp_priv);
 214         fill_buffer(sp_priv);
 215 }
 216
 217 static LRESULT CALLBACK speech_message_handler( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam )
 218 {
 219         dbg(1, "message_handler called\n");
 220
 221         switch (uMsg)
 222         {
 223                 case msg_say:
 224                 {
 225                         struct speech_priv* sp_priv = (struct speech_priv*)wParam;
 226                         sp_priv->phrases = g_list_append(sp_priv->phrases, (char*)lParam);
 227
 228                         if ( sp_priv->state == state_available )
 229                         {
 230                                 start_speaking(sp_priv);
 231                         }
 232
 233                 }
 234                 break;
 235                 case MM_WOM_DONE:
 236                 {
 237                         dbg(2, "Wave buffer done\n");
 238                         WAVEHDR *WaveHeader = (WAVEHDR *)lParam;
 239
 240                         struct speech_priv* sp_priv = (struct speech_priv*)WaveHeader->dwUser;
 241                         sp_priv->free_buffers = g_list_append(sp_priv->free_buffers, WaveHeader);
 242
 243                         if ( sp_priv->state != state_speaking_phase_3)
 244                         {
 245                                 fill_buffer(sp_priv);
 246                         }
 247                         else if ( g_list_length(sp_priv->free_buffers) == BUFFERS && sp_priv->state == state_speaking_phase_3 )
 248                         {
 249                                 // remove the spoken phrase from the list
 250                                 char *phrase = g_list_first(sp_priv->phrases)->data;
 251                                 g_free( phrase );
 252                                 sp_priv->phrases = g_list_remove(sp_priv->phrases, phrase);
 253
 254                                 if ( sp_priv->phrases )
 255                                 {
 256                                         start_speaking(sp_priv);
 257                                 }
 258                                 else
 259                                 {
 260                                         sp_priv->state = state_available;
 261                                 }
 262                         }
 263                 }
 264                 break;
 265                 case msg_exit:
 266                         ExitThread(0);
 267                         break;
 268
 269                 default:
 270                         break;
 271
 272         }
 273
 274         return TRUE;
 275 }
 276
 277 static void speech_message_dispatcher( struct speech_priv * sp_priv)
 278 {
 279         BOOL bRet;
 280         MSG msg;
 281
 282     while( (bRet = GetMessage( &msg, NULL, 0, 0 )) != 0)
 283     {
 284         if (bRet == -1)
 285         {
 286             dbg(0, "Error getting message from queue\n");
 287             break;
 288         }
 289         else
 290         {
 291             TranslateMessage(&msg);
 292             DispatchMessage(&msg);
 293         }
 294     }
 295 }
 296
 297 static void create_buffers(struct speech_priv *sp_priv)
 298 {
 299         int buffer_counter;
 300         for (buffer_counter = 0; buffer_counter < BUFFERS; buffer_counter++)
 301         {
 302                 WAVEHDR *WaveHeader = g_new0(WAVEHDR, 1);
 303
 304                 WaveHeader->dwBufferLength = SAMPLES_PER_BUFFER * 2;
 305                 WaveHeader->lpData = (char *)VirtualAlloc(0, WaveHeader->dwBufferLength, MEM_COMMIT, PAGE_READWRITE);
 306                 WaveHeader->dwUser = (DWORD)sp_priv;
 307                 waveOutPrepareHeader(sp_priv->h_wave_out, WaveHeader, sizeof(WAVEHDR));
 308
 309                 sp_priv->free_buffers = g_list_append( sp_priv->free_buffers,  WaveHeader );
 310         }
 311 }
 312
 313 static DWORD startThread( LPVOID sp_priv)
 314 {
 315         struct speech_priv *this = (struct speech_priv *) sp_priv;
 316         // Create message queue
 317         TCHAR *g_szClassName  = TEXT("SpeechQueue");
 318     WNDCLASS wc;
 319     HWND hwnd;
 320
 321
 322         memset(&wc, 0 , sizeof(WNDCLASS));
 323     wc.lpfnWndProc      = speech_message_handler;
 324     wc.hInstance        = GetModuleHandle(NULL);
 325     wc.lpszClassName = g_szClassName;
 326
 327     if (!RegisterClass(&wc))
 328     {
 329         dbg(0, "Window registration for message queue failed\n");
 330         return 1;
 331     }
 332
 333     HWND hWndParent = NULL;
 334 #ifndef HAVE_API_WIN32_CE
 335     hWndParent = HWND_MESSAGE;
 336 #endif
 337
 338     // create a message only window
 339     hwnd = CreateWindow(
 340                                 g_szClassName,
 341                                 TEXT("Navit"),
 342                                 0,
 343                                 0,
 344                                 0,
 345                                 0,
 346                                 0,
 347                                 hWndParent,
 348                                 NULL,
 349                                 GetModuleHandle(NULL),
 350                                 NULL);
 351
 352     if (hwnd == NULL)
 353     {
 354         dbg(0, "Window creation failed: %d\n",  GetLastError());
 355         return 1;
 356     }
 357
 358     this->h_queue = hwnd;
 359         this->phrases = NULL;
 360         this->state = state_available;
 361
 362         if(!waveout_open(this))
 363         {
 364                 dbg(0, "Can't open wave output\n");
 365                 return 1;
 366         }
 367
 368         this->free_buffers = NULL;
 369         create_buffers(this);
 370
 371         speech_message_dispatcher(this);
 372
 373     return 0;
 374 }
 375
 376 static int
 377 espeak_say(struct speech_priv *this, const char *text)
 378 {
 379         dbg(1, "Speak: '%s'\n", text);
 380         char *phrase = g_strdup(text);
 381
 382         if (!PostMessage(this->h_queue, msg_say, (WPARAM)this, (LPARAM)phrase))
 383         {
 384                 dbg(0, "PostThreadMessage 'say' failed\n");
 385         }
 386
 387         return 0;
 388 }
 389
 390 static void free_list(gpointer pointer, gpointer this )
 391 {
 392         if ( this )
 393         {
 394                 struct speech_priv *sp_priv = (struct speech_priv *)this;
 395                 WAVEHDR *WaveHeader = (WAVEHDR *)pointer;
 396
 397                 waveOutUnprepareHeader(sp_priv->h_wave_out, WaveHeader, sizeof(WAVEHDR));
 398                 VirtualFree(WaveHeader->lpData, WaveHeader->dwBufferLength, MEM_DECOMMIT);
 399         }
 400         g_free(pointer);
 401 }
 402
 403 static void
 404 espeak_destroy(struct speech_priv *this)
 405 {
 406         g_list_foreach( this->free_buffers, free_list, (gpointer)this );
 407         g_list_free( this->free_buffers );
 408
 409         g_list_foreach( this->phrases, free_list, 0 );
 410         g_list_free(this->phrases);
 411
 412         waveout_close(this);
 413         g_free(this);
 414 }
 415
 416 static struct speech_methods espeak_meth = {
 417         espeak_destroy,
 418         espeak_say,
 419 };
 420
 421 static struct speech_priv *
 422 espeak_new(struct speech_methods *meth, struct attr **attrs) {
 423         struct speech_priv *this = NULL;
 424         struct attr *path;
 425         struct attr *language;
 426         char *lang_str=NULL;
 427
 428         path=attr_search(attrs, NULL, attr_path);
 429         if (path)
 430                 strcpy(path_home,path->u.str);
 431         else
 432                 sprintf(path_home,"%s/espeak-data",getenv("NAVIT_SHAREDIR"));
 433         dbg(0,"path_home set to %s\n",path_home);
 434
 435         if ( !initialise() )
 436         {
 437                 return NULL;
 438         }
 439
 440         language=attr_search(attrs, NULL, attr_language);
 441         if ( language ) {
 442                 lang_str=g_strdup(language->u.str);
 443         } else {
 444                 char *lang_env=getenv("LANG");
 445
 446                 if (lang_env) {
 447                         char *country,*lang,*lang_full;
 448                         char *file1;
 449                         char *file2;
 450                         lang_full=g_strdup(lang_env);
 451                         strtolower(lang_full,lang_env);
 452                         lang=g_strdup(lang_full);
 453                         country=strchr(lang_full,'_');
 454                         if (country) {
 455                                 lang[country-lang_full]='\0';
 456                                 *country++='-';
 457                         }
 458                         file1=g_strdup_printf("%s/voices/%s",path_home,lang_full);
 459                         file2=g_strdup_printf("%s/voices/%s/%s",path_home,lang,lang_full);
 460                         dbg(0,"Testing %s and %s\n",file1,file2);
 461                         if (file_exists(file1) || file_exists(file2))
 462                                 lang_str=g_strdup(lang_full);
 463                         else
 464                                 lang_str=g_strdup(lang);
 465                         dbg(0,"Language full %s lang %s result %s\n",lang_full,lang,lang_str);
 466                         g_free(lang_full);
 467                         g_free(lang);
 468                         g_free(file1);
 469                         g_free(file2);
 470                 }
 471         }
 472         if(lang_str && SetVoiceByName(lang_str) != EE_OK)
 473         {
 474                 dbg(0, "Error setting language to: '%s',falling back to default\n", lang_str);
 475                 g_free(lang_str);
 476                 lang_str=NULL;
 477         }
 478         if(!lang_str && SetVoiceByName("default") != EE_OK) {
 479                 dbg(0, "Error setting language to default\n");
 480         }
 481
 482
 483         SetParameter(espeakRATE,170,0);
 484         SetParameter(espeakVOLUME,100,0);
 485         SetParameter(espeakCAPITALS,option_capitals,0);
 486         SetParameter(espeakPUNCTUATION,option_punctuation,0);
 487         SetParameter(espeakWORDGAP,0,0);
 488
 489 //      if(pitch_adjustment != 50)
 490 //      {
 491 //              SetParameter(espeakPITCH,pitch_adjustment,0);
 492 //      }
 493         DoVoiceChange(voice);
 494
 495         this=g_new(struct speech_priv,1);
 496         this->h_message_thread = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE)startThread, (PVOID)this, 0, NULL);
 497
 498         *meth=espeak_meth;
 499
 500
 501         return this;
 502 }
 503
 504 void
 505 plugin_init(void)
 506 {
 507         plugin_register_speech_type("espeak", espeak_new);
 508 }