You've heard of Google Chat, Google Voice, Google Talk? Well, this is Google Squawk! :-D It's a commandline application that connects to the Google Cloud TTS API and generates audio from text.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

147 lines
4.4 KiB

package main
import (
"bufio"
"context"
"encoding/binary"
"flag"
"fmt"
"io/ioutil"
"log"
"os"
texttospeech "cloud.google.com/go/texttospeech/apiv1"
texttospeechpb "google.golang.org/genproto/googleapis/cloud/texttospeech/v1"
)
type CommandlineOptions struct {
Ssml *bool `json:"ssml,omitempty"`
Output *string `json:"output,omitempty"`
Language *string `json:"lang,omitempty"`
Gender *string `json:"gender,omitempty"`
Voice *string `json:"voice,omitempty"`
Format *string `json:"format,omitempty"`
Speed *float64 `json:"speed,omitempty"`
Pitch *float64 `json:"pitch,omitempty"`
SampleRate *int `json:"samplerate,omitempty"`
VolumeGain *float64 `json:"volume,omitempty"`
}
func main() {
//check commandline args:
opts := &CommandlineOptions{
Ssml: flag.Bool("ssml", false, "Input is SSML format, rather than plain text."),
Output: flag.String("o", "./tts.mp3", "Output file path. Use '-' for stdout."),
Language: flag.String("l", "en-US", "Language selection. 'en-US', 'en-GB', 'en-AU', 'en-IN', 'el-GR', 'ru-RU', etc."),
Gender: flag.String("g", "m", "Gender selection. [m,f,n]"),
Format: flag.String("f", "mp3", "Format selection. [mp3,opus,pcm,ulaw,alaw]"),
Voice: flag.String("v", "unspecified", "Voice. If specified, this overrides language & gender."),
Speed: flag.Float64("s", 1.0, "Speed. E.g. '1.0' is normal. '2.0' is double speed, '0.25' is quarter speed, etc."),
Pitch: flag.Float64("p", 1.0, "Pitch. E.g. '0.0' is normal. '20.0' is highest, '-20.0' is lowest."),
SampleRate: flag.Int("r", 32000, "Samplerate. [8000,11025,16000,22050,24000,32000,44100,48000]"),
VolumeGain: flag.Float64("db", 0.0, "Volume gain in dB."),
}
flag.Parse()
var audioFormat texttospeechpb.AudioEncoding
var fileExtension string
switch *opts.Format {
case "mp3":
audioFormat = texttospeechpb.AudioEncoding_MP3
fileExtension = "mp3"
case "opus":
audioFormat = texttospeechpb.AudioEncoding_OGG_OPUS
fileExtension = "ogg"
case "ogg":
audioFormat = texttospeechpb.AudioEncoding_OGG_OPUS
fileExtension = "ogg"
case "pcm":
audioFormat = texttospeechpb.AudioEncoding_LINEAR16
fileExtension = "pcm"
case "ulaw":
audioFormat = texttospeechpb.AudioEncoding_MULAW
fileExtension = "ulaw"
case "alaw":
audioFormat = texttospeechpb.AudioEncoding_ALAW
fileExtension = "alaw"
default:
audioFormat = texttospeechpb.AudioEncoding_MP3
fileExtension = "mp3"
}
filename := "tts." + fileExtension
if *opts.Output != "./tts.mp3" {
filename = *opts.Output
}
//Instantiates a Google Cloud client
ctx := context.Background()
client, err := texttospeech.NewClient(ctx)
if err != nil {
log.Fatal(err)
}
defer client.Close()
//take input from stdin
stdinReader := bufio.NewReader(os.Stdin)
input, _ := stdinReader.ReadString('\n')
synthInput := &texttospeechpb.SynthesisInput{}
synthInput.InputSource = &texttospeechpb.SynthesisInput_Text{Text: input}
if *opts.Ssml {
synthInput.InputSource = &texttospeechpb.SynthesisInput_Ssml{Ssml: input}
}
//Voice Gender
var gender texttospeechpb.SsmlVoiceGender
switch *opts.Gender {
case "m":
gender = texttospeechpb.SsmlVoiceGender_MALE
case "f":
gender = texttospeechpb.SsmlVoiceGender_FEMALE
default:
gender = texttospeechpb.SsmlVoiceGender_NEUTRAL
}
voice := &texttospeechpb.VoiceSelectionParams{
LanguageCode: *opts.Language,
SsmlGender: gender,
//Name: *opts.Voice, //Name overrides LanguageCode and SsmlGender
//Name: "en-US-Wavenet-B",
}
if *opts.Voice != "unspecified" {
voice.Name = *opts.Voice
}
//the request parameters
req := texttospeechpb.SynthesizeSpeechRequest{
Input: synthInput,
Voice: voice,
AudioConfig: &texttospeechpb.AudioConfig{
AudioEncoding: audioFormat,
SpeakingRate: *opts.Speed,
SampleRateHertz: int32(*opts.SampleRate),
Pitch: *opts.Pitch,
VolumeGainDb: *opts.VolumeGain,
},
}
resp, err := client.SynthesizeSpeech(ctx, &req)
if err != nil {
log.Fatal(err)
}
if *opts.Output == "-" { //write to stdout
//binary.Write(os.Stdout, binary.LittleEndian, resp.AudioContent)
bufStdout := bufio.NewWriter(os.Stdout) //add a buffer
defer bufStdout.Flush()
binary.Write(bufStdout, binary.LittleEndian, resp.AudioContent)
} else { //write to file
err = ioutil.WriteFile(filename, resp.AudioContent, 0644)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Audio content written to file: %v\n", filename)
}
}