ruhnet
/
google-squawk


								package main


								import (

									"bufio"

									"context"

									"encoding/binary"

									"flag"

									"fmt"

									"io/ioutil"

									"log"

									"os"


									texttospeech "cloud.google.com/go/texttospeech/apiv1"

									texttospeechpb "google.golang.org/genproto/googleapis/cloud/texttospeech/v1"

								)


								type CommandlineOptions struct {

									Ssml       *bool    `json:"ssml,omitempty"`

									Output     *string  `json:"output,omitempty"`

									Language   *string  `json:"lang,omitempty"`

									Gender     *string  `json:"gender,omitempty"`

									Voice      *string  `json:"voice,omitempty"`

									Format     *string  `json:"format,omitempty"`

									Speed      *float64 `json:"speed,omitempty"`

									Pitch      *float64 `json:"pitch,omitempty"`

									SampleRate *int     `json:"samplerate,omitempty"`

									VolumeGain *float64 `json:"volume,omitempty"`

								}


								func main() {

									//check commandline args:

									opts := &CommandlineOptions{

										Ssml:       flag.Bool("ssml", false, "Input is SSML format, rather than plain text."),

										Output:     flag.String("o", "./tts.mp3", "Output file path. Use '-' for stdout."),

										Language:   flag.String("l", "en-US", "Language selection. 'en-US', 'en-GB', 'en-AU', 'en-IN', 'el-GR', 'ru-RU', etc."),

										Gender:     flag.String("g", "m", "Gender selection. [m,f,n]"),

										Format:     flag.String("f", "mp3", "Format selection. [mp3,opus,pcm,ulaw,alaw]"),

										Voice:      flag.String("v", "unspecified", "Voice. If specified, this overrides language & gender."),

										Speed:      flag.Float64("s", 1.0, "Speed. E.g. '1.0' is normal. '2.0' is double speed, '0.25' is quarter speed, etc."),

										Pitch:      flag.Float64("p", 1.0, "Pitch. E.g. '0.0' is normal. '20.0' is highest, '-20.0' is lowest."),

										SampleRate: flag.Int("r", 32000, "Samplerate. [8000,11025,16000,22050,24000,32000,44100,48000]"),

										VolumeGain: flag.Float64("db", 0.0, "Volume gain in dB."),

									}

									flag.Parse()


									var audioFormat texttospeechpb.AudioEncoding

									var fileExtension string

									switch *opts.Format {

									case "mp3":

										audioFormat = texttospeechpb.AudioEncoding_MP3

										fileExtension = "mp3"

									case "opus":

										audioFormat = texttospeechpb.AudioEncoding_OGG_OPUS

										fileExtension = "ogg"

									case "ogg":

										audioFormat = texttospeechpb.AudioEncoding_OGG_OPUS

										fileExtension = "ogg"

									case "pcm":

										audioFormat = texttospeechpb.AudioEncoding_LINEAR16

										fileExtension = "pcm"

									case "ulaw":

										audioFormat = texttospeechpb.AudioEncoding_MULAW

										fileExtension = "ulaw"

									case "alaw":

										audioFormat = texttospeechpb.AudioEncoding_ALAW

										fileExtension = "alaw"

									default:

										audioFormat = texttospeechpb.AudioEncoding_MP3

										fileExtension = "mp3"

									}


									filename := "tts." + fileExtension

									if *opts.Output != "./tts.mp3" {

										filename = *opts.Output

									}


									//Instantiates a Google Cloud client

									ctx := context.Background()

									client, err := texttospeech.NewClient(ctx)

									if err != nil {

										log.Fatal(err)

									}

									defer client.Close()


									//take input from stdin

									stdinReader := bufio.NewReader(os.Stdin)

									input, _ := stdinReader.ReadString('\n')


									synthInput := &texttospeechpb.SynthesisInput{}

									synthInput.InputSource = &texttospeechpb.SynthesisInput_Text{Text: input}

									if *opts.Ssml {

										synthInput.InputSource = &texttospeechpb.SynthesisInput_Ssml{Ssml: input}

									}


									//Voice Gender

									var gender texttospeechpb.SsmlVoiceGender

									switch *opts.Gender {

									case "m":

										gender = texttospeechpb.SsmlVoiceGender_MALE

									case "f":

										gender = texttospeechpb.SsmlVoiceGender_FEMALE

									default:

										gender = texttospeechpb.SsmlVoiceGender_NEUTRAL

									}


									voice := &texttospeechpb.VoiceSelectionParams{

										LanguageCode: *opts.Language,

										SsmlGender:   gender,

										//Name:         *opts.Voice, //Name overrides LanguageCode and SsmlGender

										//Name: "en-US-Wavenet-B",

									}

									if *opts.Voice != "unspecified" {

										voice.Name = *opts.Voice

									}


									//the request parameters

									req := texttospeechpb.SynthesizeSpeechRequest{

										Input: synthInput,

										Voice: voice,

										AudioConfig: &texttospeechpb.AudioConfig{

											AudioEncoding:   audioFormat,

											SpeakingRate:    *opts.Speed,

											SampleRateHertz: int32(*opts.SampleRate),

											Pitch:           *opts.Pitch,

											VolumeGainDb:    *opts.VolumeGain,

										},

									}


									resp, err := client.SynthesizeSpeech(ctx, &req)

									if err != nil {

										log.Fatal(err)

									}


									if *opts.Output == "-" { //write to stdout

										//binary.Write(os.Stdout, binary.LittleEndian, resp.AudioContent)

										bufStdout := bufio.NewWriter(os.Stdout) //add a buffer

										defer bufStdout.Flush()

										binary.Write(bufStdout, binary.LittleEndian, resp.AudioContent)

									} else { //write to file

										err = ioutil.WriteFile(filename, resp.AudioContent, 0644)

										if err != nil {

											log.Fatal(err)

										}

										fmt.Printf("Audio content written to file: %v\n", filename)

									}


								}