//Copyright 2022 Ruel Tmeizeh - All Rights Reserved
package main

import (
	"bufio"
	"context"
	"encoding/binary"
	"flag"
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"os"

	texttospeech "cloud.google.com/go/texttospeech/apiv1"
	texttospeechpb "google.golang.org/genproto/googleapis/cloud/texttospeech/v1"
)

type CommandlineOptions struct {
	ListVoices *bool    `json:"listvoices,omitempty"`
	Ssml       *bool    `json:"ssml,omitempty"`
	Output     *string  `json:"output,omitempty"`
	Input      *string  `json:"input,omitempty"`
	Language   *string  `json:"language,omitempty"`
	Gender     *string  `json:"gender,omitempty"`
	Voice      *string  `json:"voice,omitempty"`
	Format     *string  `json:"format,omitempty"`
	Speed      *float64 `json:"speed,omitempty"`
	Pitch      *float64 `json:"pitch,omitempty"`
	SampleRate *int     `json:"samplerate,omitempty"`
	VolumeGain *float64 `json:"volume,omitempty"`
}

func main() {
	//check commandline args:
	opts := &CommandlineOptions{
		ListVoices: flag.Bool("listvoices", false, "List available voices, rather than generate TTS. Use in\ncombination with '-l ALL' to show voices from all languages."),
		Ssml:       flag.Bool("ssml", false, "Input is SSML format, rather than plain text."),
		Input:      flag.String("i", "-", "Input file path. Defaults to stdin.\n"),
		Output:     flag.String("o", "./tts.mp3", "Output file path. Use '-' for stdout.\n"),
		Language:   flag.String("l", "en-US", "Language selection. 'en-US', 'en-GB', 'en-AU', 'en-IN',\n'el-GR', 'ru-RU', etc.\n"),
		Gender:     flag.String("g", "m", "Gender selection. [m,f,n] 'n' means neutral/don't care.\n"),
		Format:     flag.String("f", "mp3", "Audio format selection. PCM is uncompressed best quality. Opus is\nexcellent quality. MP3 is 32kb bitrate. [pcm,opus,mp3,ulaw,alaw]\n"),
		Voice:      flag.String("v", "unspecified", "Voice. If specified, this overrides language & gender.\n"),
		Speed:      flag.Float64("s", 1.0, "Speed. E.g. '1.0' is normal. '2.0' is double\nspeed, '0.25' is quarter speed, etc.\n"),
		Pitch:      flag.Float64("p", 0.0, "Pitch. E.g. '0.0' is normal. '20.0' is highest,\n'-20.0' is lowest.\n (default 0)"),
		SampleRate: flag.Int("r", 24000, "Samplerate in Hz. [8000,11025,16000,22050,24000,32000,44100,48000]\n"),
		VolumeGain: flag.Float64("-db", 0.0, "Volume gain in dB. [-96 to 16]\n (default 0)"),
	}
	flag.Parse()

	var audioFormat texttospeechpb.AudioEncoding
	var fileExtension string
	switch *opts.Format {
	case "mp3":
		audioFormat = texttospeechpb.AudioEncoding_MP3
		fileExtension = "mp3"
	case "opus":
		audioFormat = texttospeechpb.AudioEncoding_OGG_OPUS
		fileExtension = "ogg"
	case "ogg":
		audioFormat = texttospeechpb.AudioEncoding_OGG_OPUS
		fileExtension = "ogg"
	case "pcm":
		audioFormat = texttospeechpb.AudioEncoding_LINEAR16
		fileExtension = "pcm"
	case "ulaw":
		audioFormat = texttospeechpb.AudioEncoding_MULAW
		fileExtension = "ulaw"
	case "alaw":
		audioFormat = texttospeechpb.AudioEncoding_ALAW
		fileExtension = "alaw"
	default:
		audioFormat = texttospeechpb.AudioEncoding_MP3
		fileExtension = "mp3"
	}

	filename := "tts." + fileExtension
	if *opts.Output != "./tts.mp3" {
		filename = *opts.Output
	}

	///////////////////////////////////////
	//Instantiates a Google Cloud client
	ctx := context.Background()
	client, err := texttospeech.NewClient(ctx)
	if err != nil {
		log.Fatal(err)
	}
	defer client.Close()

	if *opts.ListVoices {
		fmt.Println("Available Voices:")
		bufStdout := bufio.NewWriter(os.Stdout)
		listVoices(bufStdout, ctx, client, *opts.Language)
		bufStdout.Flush()
		os.Exit(0)
	}

	var inputFile *os.File
	if *opts.Input == "-" {
		//read input from stdin
		inputFile = os.Stdin
	} else {
		//read input from file
		var err error
		inputFile, err = os.Open(*opts.Input)
		if err != nil {
			log.Fatal(err)
		}
		defer inputFile.Close()
	}

	var input string

	scanner := bufio.NewScanner(inputFile)
	for scanner.Scan() {
		//fmt.Println(scanner.Text())
		input = input + scanner.Text()
	}

	//Start building TTS request things
	synthInput := &texttospeechpb.SynthesisInput{}
	synthInput.InputSource = &texttospeechpb.SynthesisInput_Text{Text: input}
	if *opts.Ssml {
		synthInput.InputSource = &texttospeechpb.SynthesisInput_Ssml{Ssml: input}
	}

	//Voice Gender
	var gender texttospeechpb.SsmlVoiceGender
	switch *opts.Gender {
	case "m":
		gender = texttospeechpb.SsmlVoiceGender_MALE
	case "f":
		gender = texttospeechpb.SsmlVoiceGender_FEMALE
	default:
		gender = texttospeechpb.SsmlVoiceGender_NEUTRAL
	}

	voice := &texttospeechpb.VoiceSelectionParams{
		LanguageCode: *opts.Language,
		SsmlGender:   gender,
		//Name:         *opts.Voice, //Name overrides LanguageCode and SsmlGender
		//Name: "en-US-Wavenet-B",
	}
	if *opts.Voice != "unspecified" {
		voice.Name = *opts.Voice
	}

	//the request parameters
	req := texttospeechpb.SynthesizeSpeechRequest{
		Input: synthInput,
		Voice: voice,
		AudioConfig: &texttospeechpb.AudioConfig{
			AudioEncoding:   audioFormat,
			SpeakingRate:    *opts.Speed,
			SampleRateHertz: int32(*opts.SampleRate),
			Pitch:           *opts.Pitch,
			VolumeGainDb:    *opts.VolumeGain,
		},
	}

	resp, err := client.SynthesizeSpeech(ctx, &req)
	if err != nil {
		log.Fatal(err)
	}

	if *opts.Output == "-" { //write to stdout
		//binary.Write(os.Stdout, binary.LittleEndian, resp.AudioContent)
		bufStdout := bufio.NewWriter(os.Stdout) //add a buffer
		defer bufStdout.Flush()
		binary.Write(bufStdout, binary.LittleEndian, resp.AudioContent)
	} else { //write to file
		err = ioutil.WriteFile(filename, resp.AudioContent, 0644)
		if err != nil {
			log.Fatal(err)
		}
		fmt.Printf("Audio content written to file: %v\n", filename)
	}

}

func listVoices(w io.Writer, ctx context.Context, client *texttospeech.Client, lang string) error {
	resp, err := client.ListVoices(ctx, &texttospeechpb.ListVoicesRequest{})
	if err != nil {
		return err
	}

	for _, voice := range resp.Voices {
		for _, languageCode := range voice.LanguageCodes {
			if lang == languageCode || lang == "ALL" {
				fmt.Fprintln(w, "___________________________________")
				fmt.Fprintf(w, "Name: %v\n", voice.Name)
				fmt.Fprintf(w, "  Language: %v\n", languageCode)
				fmt.Fprintf(w, "  Gender: %v\n", voice.SsmlGender.String())
				fmt.Fprintf(w, "  Native Sample Rate (in Hz): %v\n", voice.NaturalSampleRateHertz)
			}
		}
	}
	fmt.Fprintln(w, "------------------------------------")

	return nil
}