package main import ( "bufio" "context" "encoding/binary" "flag" "fmt" "io/ioutil" "log" "os" texttospeech "cloud.google.com/go/texttospeech/apiv1" texttospeechpb "google.golang.org/genproto/googleapis/cloud/texttospeech/v1" ) type CommandlineOptions struct { Ssml *bool `json:"ssml,omitempty"` Output *string `json:"output,omitempty"` Language *string `json:"lang,omitempty"` Gender *string `json:"gender,omitempty"` Voice *string `json:"voice,omitempty"` Format *string `json:"format,omitempty"` Speed *float64 `json:"speed,omitempty"` Pitch *float64 `json:"pitch,omitempty"` SampleRate *int `json:"samplerate,omitempty"` VolumeGain *float64 `json:"volume,omitempty"` } func main() { //check commandline args: opts := &CommandlineOptions{ Ssml: flag.Bool("ssml", false, "Input is SSML format, rather than plain text."), Output: flag.String("o", "./tts.mp3", "Output file path. Use '-' for stdout."), Language: flag.String("l", "en-US", "Language selection. 'en-US', 'en-GB', 'en-AU', 'en-IN', 'el-GR', 'ru-RU', etc."), Gender: flag.String("g", "m", "Gender selection. [m,f,n]"), Format: flag.String("f", "mp3", "Format selection. [mp3,opus,pcm,ulaw,alaw]"), Voice: flag.String("v", "unspecified", "Voice. If specified, this overrides language & gender."), Speed: flag.Float64("s", 1.0, "Speed. E.g. '1.0' is normal. '2.0' is double speed, '0.25' is quarter speed, etc."), Pitch: flag.Float64("p", 1.0, "Pitch. E.g. '0.0' is normal. '20.0' is highest, '-20.0' is lowest."), SampleRate: flag.Int("r", 32000, "Samplerate. [8000,11025,16000,22050,24000,32000,44100,48000]"), VolumeGain: flag.Float64("db", 0.0, "Volume gain in dB."), } flag.Parse() var audioFormat texttospeechpb.AudioEncoding var fileExtension string switch *opts.Format { case "mp3": audioFormat = texttospeechpb.AudioEncoding_MP3 fileExtension = "mp3" case "opus": audioFormat = texttospeechpb.AudioEncoding_OGG_OPUS fileExtension = "ogg" case "ogg": audioFormat = texttospeechpb.AudioEncoding_OGG_OPUS fileExtension = "ogg" case "pcm": audioFormat = texttospeechpb.AudioEncoding_LINEAR16 fileExtension = "pcm" case "ulaw": audioFormat = texttospeechpb.AudioEncoding_MULAW fileExtension = "ulaw" case "alaw": audioFormat = texttospeechpb.AudioEncoding_ALAW fileExtension = "alaw" default: audioFormat = texttospeechpb.AudioEncoding_MP3 fileExtension = "mp3" } filename := "tts." + fileExtension if *opts.Output != "./tts.mp3" { filename = *opts.Output } //Instantiates a Google Cloud client ctx := context.Background() client, err := texttospeech.NewClient(ctx) if err != nil { log.Fatal(err) } defer client.Close() //take input from stdin stdinReader := bufio.NewReader(os.Stdin) input, _ := stdinReader.ReadString('\n') synthInput := &texttospeechpb.SynthesisInput{} synthInput.InputSource = &texttospeechpb.SynthesisInput_Text{Text: input} if *opts.Ssml { synthInput.InputSource = &texttospeechpb.SynthesisInput_Ssml{Ssml: input} } //Voice Gender var gender texttospeechpb.SsmlVoiceGender switch *opts.Gender { case "m": gender = texttospeechpb.SsmlVoiceGender_MALE case "f": gender = texttospeechpb.SsmlVoiceGender_FEMALE default: gender = texttospeechpb.SsmlVoiceGender_NEUTRAL } voice := &texttospeechpb.VoiceSelectionParams{ LanguageCode: *opts.Language, SsmlGender: gender, //Name: *opts.Voice, //Name overrides LanguageCode and SsmlGender //Name: "en-US-Wavenet-B", } if *opts.Voice != "unspecified" { voice.Name = *opts.Voice } //the request parameters req := texttospeechpb.SynthesizeSpeechRequest{ Input: synthInput, Voice: voice, AudioConfig: &texttospeechpb.AudioConfig{ AudioEncoding: audioFormat, SpeakingRate: *opts.Speed, SampleRateHertz: int32(*opts.SampleRate), Pitch: *opts.Pitch, VolumeGainDb: *opts.VolumeGain, }, } resp, err := client.SynthesizeSpeech(ctx, &req) if err != nil { log.Fatal(err) } if *opts.Output == "-" { //write to stdout //binary.Write(os.Stdout, binary.LittleEndian, resp.AudioContent) bufStdout := bufio.NewWriter(os.Stdout) //add a buffer defer bufStdout.Flush() binary.Write(bufStdout, binary.LittleEndian, resp.AudioContent) } else { //write to file err = ioutil.WriteFile(filename, resp.AudioContent, 0644) if err != nil { log.Fatal(err) } fmt.Printf("Audio content written to file: %v\n", filename) } }