1
0
mirror of https://github.com/fumiama/ahsai.git synced 2026-06-04 23:30:24 +08:00
This commit is contained in:
fumiama
2022-08-07 22:49:51 +08:00
parent 8d55badf65
commit 08db62a32d
5 changed files with 154 additions and 4 deletions

2
.gitignore vendored
View File

@@ -13,3 +13,5 @@
# Dependency directories (remove the comment below to include it)
# vendor/
test

View File

@@ -51,4 +51,33 @@ func TestAPI(t *testing.T) {
- ついなちゃん標準語
- ついなちゃん関西弁
- 伊織弓鶴
- 音街ウナ
- 音街ウナ
## commandline tool
```bash
go run cmd/main.go -h
Usage:
-a float
anger
-b uint
border slience sample lenth (default 2048)
-d float
sadness
-f string
line-separated text to read
-h display this help
-j float
joy
-n string
specify speaker (default "民安ともえ")
-o string
output wav file path (default "out.wav")
-p float
pitch (default 1)
-r float
range (default 1)
-s float
speed (default 1)
-v float
volume (default 1)
```

5
api.go
View File

@@ -1,3 +1,4 @@
// Package ahsai AH Soft フリーテキスト音声合成 demo API
package ahsai
import (
@@ -35,11 +36,14 @@ var (
}
)
// Speaker is a particular setting of the API
type Speaker struct {
id uint32
Volume, Speed, Pitch, Range, Anger, Sadness, Joy float32
}
// NewSpeaker init default para of a speaker.
// Use s.SetName() to set the real speaker.
func NewSpeaker() (s Speaker) {
s.Volume = 1
s.Speed = 1
@@ -55,6 +59,7 @@ var (
ErrNoSuchSpeaker = errors.New("no such speaker")
)
// SetName set the speaker id.
func (s *Speaker) SetName(name string) error {
id, ok := speakers[name]
if !ok {

81
cmd/main.go Normal file
View File

@@ -0,0 +1,81 @@
package main
import (
"bufio"
"flag"
"fmt"
"os"
"strconv"
"github.com/faiface/beep/wav"
"github.com/fumiama/ahsai"
)
func main() {
f := flag.String("f", "", "line-separated text to read")
o := flag.String("o", "out.wav", "output wav file path")
n := flag.String("n", "民安ともえ", "specify speaker")
v := flag.Float64("v", 1.0, "volume")
s := flag.Float64("s", 1.0, "speed")
p := flag.Float64("p", 1.0, "pitch")
r := flag.Float64("r", 1.0, "range")
a := flag.Float64("a", 0, "anger")
d := flag.Float64("d", 0, "sadness")
j := flag.Float64("j", 0, "joy")
b := flag.Uint("b", 2048, "border slience sample lenth")
h := flag.Bool("h", false, "display this help")
flag.Parse()
if *h {
flag.Usage()
return
}
if *f == "" {
panic("parameter -f must be specified")
}
spk := ahsai.Speaker{Volume: float32(*v), Speed: float32(*s), Pitch: float32(*p), Range: float32(*r), Anger: float32(*a), Sadness: float32(*d), Joy: float32(*j)}
err := spk.SetName(*n)
if err != nil {
panic(err)
}
txt, err := os.Open(*f)
if err != nil {
panic(err)
}
sc := bufio.NewScanner(txt)
lst := make([]string, 0, 128)
i := 0
for sc.Scan() {
t := sc.Text()
i++
if len([]rune(t)) > 100 {
panic("line " + strconv.Itoa(i) + ": too long (> 100 chars)")
}
lst = append(lst, t)
}
err = txt.Close()
if err != nil {
panic(err)
}
for i, t := range lst {
u, err := spk.Speak(t)
if err != nil {
panic("line " + strconv.Itoa(i) + "error: " + err.Error())
}
lst[i] = u
fmt.Print("\rread: ", (i+1)*100/len(lst), " %")
}
sm, format, err := ahsai.ComposeStream(*b, func(p int) { fmt.Print("\rcompose: ", p, " %") }, lst...)
if err != nil {
panic(err)
}
out, err := os.Create(*o)
if err != nil {
panic(err)
}
defer out.Close()
err = wav.Encode(out, sm, format)
if err != nil {
panic(err)
}
fmt.Println("\rall process succeed")
}

39
ogg.go
View File

@@ -30,6 +30,37 @@ func cutstream(s beep.StreamSeekCloser) {
}
}
// ComposeStream 组合 urls 成为单个 stream, 并留出 sil 采样间隔
func ComposeStream(sil uint, progress func(p int), urls ...string) (sm beep.Streamer, format beep.Format, err error) {
var buf *beep.Buffer
for i, u := range urls {
var resp *http.Response
resp, err = http.Get(u)
if err != nil {
return
}
var s beep.StreamSeekCloser
s, format, err = vorbis.Decode(resp.Body)
if err != nil {
_ = resp.Body.Close()
return
}
if i == 0 {
buf = beep.NewBuffer(format)
}
cutstream(s)
buf.Append(beep.Silence(int(sil)))
buf.Append(s)
_ = s.Close()
if progress != nil {
progress((i + 1) * 100 / len(urls))
}
}
sm = buf.Streamer(0, buf.Len())
return
}
// SaveOggToFile cut leading demo text and save wav to path
func SaveOggToFile(u, path string) error {
resp, err := http.Get(u)
if err != nil {
@@ -37,7 +68,7 @@ func SaveOggToFile(u, path string) error {
}
s, format, err := vorbis.Decode(resp.Body)
if err != nil {
resp.Body.Close()
_ = resp.Body.Close()
return err
}
defer s.Close()
@@ -50,6 +81,7 @@ func SaveOggToFile(u, path string) error {
return wav.Encode(f, s, format)
}
// SaveOggToWriteSeeker cut leading demo text and write wav stream to f
func SaveOggToWriteSeeker(u string, f io.WriteSeeker) error {
resp, err := http.Get(u)
if err != nil {
@@ -57,7 +89,7 @@ func SaveOggToWriteSeeker(u string, f io.WriteSeeker) error {
}
s, format, err := vorbis.Decode(resp.Body)
if err != nil {
resp.Body.Close()
_ = resp.Body.Close()
return err
}
defer s.Close()
@@ -65,6 +97,7 @@ func SaveOggToWriteSeeker(u string, f io.WriteSeeker) error {
return wav.Encode(f, s, format)
}
// PlayOgg cut leading demo text and play directly
func PlayOgg(u string) error {
resp, err := http.Get(u)
if err != nil {
@@ -72,7 +105,7 @@ func PlayOgg(u string) error {
}
s, format, err := vorbis.Decode(resp.Body)
if err != nil {
resp.Body.Close()
_ = resp.Body.Close()
return err
}
defer s.Close()