Compare commits
7 Commits
136eca180b
...
def057d8e9
| Author | SHA1 | Date | |
|---|---|---|---|
| def057d8e9 | |||
| a8c1dbc9c9 | |||
| 2134ca58f4 | |||
| 67c724ddbd | |||
| fc9331d411 | |||
| 1abad2258d | |||
| 59eee6131f |
1
.gitignore
vendored
1
.gitignore
vendored
@ -130,3 +130,4 @@ replay_pid*
|
|||||||
# Kotlin Gradle plugin data, see https://kotlinlang.org/docs/whatsnew20.html#new-directory-for-kotlin-data-in-gradle-projects
|
# Kotlin Gradle plugin data, see https://kotlinlang.org/docs/whatsnew20.html#new-directory-for-kotlin-data-in-gradle-projects
|
||||||
.kotlin/
|
.kotlin/
|
||||||
gcp/
|
gcp/
|
||||||
|
library
|
||||||
@ -20,31 +20,28 @@ repositories {
|
|||||||
mavenCentral()
|
mavenCentral()
|
||||||
}
|
}
|
||||||
|
|
||||||
extra["springCloudGcpVersion"] = "7.1.0"
|
|
||||||
extra["springCloudVersion"] = "2025.0.0"
|
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation("org.springframework.boot:spring-boot-starter-data-jpa")
|
implementation("org.springframework.boot:spring-boot-starter-data-jpa")
|
||||||
implementation("org.springframework.boot:spring-boot-starter-mail")
|
implementation("org.springframework.boot:spring-boot-starter-mail")
|
||||||
implementation("org.springframework.boot:spring-boot-starter-validation")
|
implementation("org.springframework.boot:spring-boot-starter-validation")
|
||||||
implementation("org.springframework.boot:spring-boot-starter-web")
|
implementation("org.springframework.boot:spring-boot-starter-web")
|
||||||
|
|
||||||
implementation("com.fasterxml.jackson.module:jackson-module-kotlin")
|
implementation("com.fasterxml.jackson.module:jackson-module-kotlin")
|
||||||
implementation("com.google.cloud:spring-cloud-gcp-starter")
|
implementation("io.github.oshai:kotlin-logging-jvm:7.0.3")
|
||||||
implementation("org.jetbrains.kotlin:kotlin-reflect")
|
implementation("org.jetbrains.kotlin:kotlin-reflect")
|
||||||
|
|
||||||
implementation("com.google.cloud:google-cloud-texttospeech:2.68.0")
|
implementation("com.google.cloud:google-cloud-texttospeech:2.68.0")
|
||||||
implementation("org.apache.poi:poi:5.4.1")
|
implementation("org.apache.poi:poi:5.4.1")
|
||||||
|
implementation("net.bramp.ffmpeg:ffmpeg:0.8.0")
|
||||||
|
|
||||||
runtimeOnly("com.mysql:mysql-connector-j")
|
runtimeOnly("com.mysql:mysql-connector-j")
|
||||||
|
runtimeOnly("com.h2database:h2")
|
||||||
|
|
||||||
|
testRuntimeOnly("org.junit.platform:junit-platform-launcher")
|
||||||
testImplementation("org.springframework.boot:spring-boot-starter-test")
|
testImplementation("org.springframework.boot:spring-boot-starter-test")
|
||||||
testImplementation("org.jetbrains.kotlin:kotlin-test-junit5")
|
testImplementation("org.jetbrains.kotlin:kotlin-test-junit5")
|
||||||
runtimeOnly("com.h2database:h2")
|
testImplementation("io.kotest:kotest-runner-junit5:5.9.1")
|
||||||
testRuntimeOnly("org.junit.platform:junit-platform-launcher")
|
testImplementation("io.kotest.extensions:kotest-extensions-spring:1.3.0")
|
||||||
}
|
|
||||||
|
|
||||||
dependencyManagement {
|
|
||||||
imports {
|
|
||||||
mavenBom("com.google.cloud:spring-cloud-gcp-dependencies:${property("springCloudGcpVersion")}")
|
|
||||||
mavenBom("org.springframework.cloud:spring-cloud-dependencies:${property("springCloudVersion")}")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
kotlin {
|
kotlin {
|
||||||
|
|||||||
@ -0,0 +1,8 @@
|
|||||||
|
package com.sangdol.text_to_speech
|
||||||
|
|
||||||
|
data class ConvertSingleTextRequest(
|
||||||
|
val text: String,
|
||||||
|
val type: VoiceType,
|
||||||
|
val speakingRate: Double,
|
||||||
|
val order: Int
|
||||||
|
)
|
||||||
94
src/main/kotlin/com/sangdol/text_to_speech/FfmpegUtils.kt
Normal file
94
src/main/kotlin/com/sangdol/text_to_speech/FfmpegUtils.kt
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
package com.sangdol.text_to_speech
|
||||||
|
|
||||||
|
import net.bramp.ffmpeg.FFmpeg
|
||||||
|
import net.bramp.ffmpeg.FFmpegExecutor
|
||||||
|
import net.bramp.ffmpeg.builder.FFmpegBuilder
|
||||||
|
import org.springframework.stereotype.Component
|
||||||
|
import java.io.File
|
||||||
|
|
||||||
|
private const val FFMPEG_PATH: String = "/opt/homebrew/bin/ffmpeg"
|
||||||
|
private const val SILENCE_AUDIO_PATH: String = "library/silences"
|
||||||
|
|
||||||
|
private fun convertSilenceAudioFilePath(durationMs: Long) = "$SILENCE_AUDIO_PATH/silence-${durationMs}ms.mp3"
|
||||||
|
|
||||||
|
@Component
|
||||||
|
class FfmpegUtils(
|
||||||
|
private val ffmpegExecutor: FFmpegExecutor = FFmpegExecutor(FFmpeg(FFMPEG_PATH))
|
||||||
|
) {
|
||||||
|
|
||||||
|
fun createSilenceAudio(durationMs: Long) {
|
||||||
|
val durationSeconds = durationMs / 1000.0
|
||||||
|
val outputFile = convertSilenceAudioFilePath(durationMs)
|
||||||
|
|
||||||
|
val command = listOf(
|
||||||
|
FFMPEG_PATH,
|
||||||
|
"-y",
|
||||||
|
"-f", "lavfi",
|
||||||
|
"-i", "anullsrc=r=44100:cl=stereo",
|
||||||
|
"-t", durationSeconds.toString(),
|
||||||
|
"-acodec", "mp3",
|
||||||
|
outputFile
|
||||||
|
)
|
||||||
|
|
||||||
|
val process = ProcessBuilder(command)
|
||||||
|
.redirectErrorStream(true)
|
||||||
|
.start()
|
||||||
|
|
||||||
|
process.inputStream.bufferedReader().forEachLine { println(it) }
|
||||||
|
process.waitFor()
|
||||||
|
.takeUnless { exitCode -> exitCode == 0 }
|
||||||
|
?: throw RuntimeException("FFmpeg failed with exit code ${process.exitValue()}")
|
||||||
|
}
|
||||||
|
|
||||||
|
fun mergeAudioWithInterval(intervalMs: Long, targetDirectory: String, saveDirectory: String, fileName: String) {
|
||||||
|
val targetDir = File(targetDirectory).also { validateIsValidFileDirectory(it) }
|
||||||
|
val fileNameRegex = """(\d+)(.*)""".toRegex()
|
||||||
|
|
||||||
|
if (intervalMs > 0 && !File(convertSilenceAudioFilePath(intervalMs)).exists()) {
|
||||||
|
createSilenceAudio(intervalMs)
|
||||||
|
}
|
||||||
|
|
||||||
|
val files = targetDir.listFiles().sortedBy { it ->
|
||||||
|
val matchResult: MatchResult = fileNameRegex.find(it.name)
|
||||||
|
?: throw IllegalStateException("Invalid filename-convention")
|
||||||
|
matchResult.groups[1]!!.value.toInt()
|
||||||
|
}.map { "$it" }
|
||||||
|
|
||||||
|
val inputs = mutableListOf<String>()
|
||||||
|
files.forEachIndexed { idx, file ->
|
||||||
|
inputs.add(file)
|
||||||
|
if (intervalMs > 0L && (idx != files.lastIndex)) {
|
||||||
|
inputs.add("library/silences/silence-${intervalMs}ms.mp3")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val builder = FFmpegBuilder()
|
||||||
|
|
||||||
|
inputs.forEach { filePath -> builder.addInput(filePath) }
|
||||||
|
|
||||||
|
val filterInputs = inputs.indices.joinToString("") { idx -> "[$idx:a]" }
|
||||||
|
val filterComplex = "$filterInputs concat=n=${inputs.size}:v=0:a=1[out]"
|
||||||
|
|
||||||
|
builder.setComplexFilter(filterComplex)
|
||||||
|
.addOutput("$saveDirectory/$fileName")
|
||||||
|
.setAudioCodec("mp3")
|
||||||
|
.addExtraArgs("-map", "[out]")
|
||||||
|
.done()
|
||||||
|
|
||||||
|
ffmpegExecutor.createJob(builder).run()
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun validateIsValidFileDirectory(dir: File) {
|
||||||
|
if (!dir.exists()) {
|
||||||
|
throw IllegalArgumentException("Directory not found: $dir")
|
||||||
|
}
|
||||||
|
if (!dir.isDirectory) {
|
||||||
|
throw IllegalArgumentException("Path is not a directory: $dir")
|
||||||
|
}
|
||||||
|
val fileList = dir.listFiles()
|
||||||
|
|
||||||
|
if (fileList == null || fileList.isEmpty()) {
|
||||||
|
throw IllegalArgumentException("Directory is null or empty: $dir")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,44 @@
|
|||||||
|
package com.sangdol.text_to_speech
|
||||||
|
|
||||||
|
import com.google.cloud.texttospeech.v1.TextToSpeechClient
|
||||||
|
import org.springframework.context.annotation.Bean
|
||||||
|
import org.springframework.context.annotation.Configuration
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
class GoogleTtsConfig {
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
fun textToSpeechClient(): TextToSpeechClient = TextToSpeechClient.create()
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class TtsLanguageCode(
|
||||||
|
val code: String
|
||||||
|
) {
|
||||||
|
ENGLISH_US("en-US")
|
||||||
|
}
|
||||||
|
|
||||||
|
sealed interface VoiceType {
|
||||||
|
val identifier: String
|
||||||
|
val name: String
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class Neural2MaleVoice(
|
||||||
|
override val identifier: String,
|
||||||
|
): VoiceType {
|
||||||
|
TYPE_A("en-US-Neural2-A"),
|
||||||
|
TYPE_D("en-US-Neural2-D"),
|
||||||
|
TYPE_I("en-US-Neural2-I"),
|
||||||
|
TYPE_J("en-US-Neural2-J"),
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class Neural2FemaleVoice(
|
||||||
|
override val identifier: String
|
||||||
|
): VoiceType {
|
||||||
|
TYPE_C("en-US-Neural2-C"),
|
||||||
|
TYPE_E("en-US-Neural2-E"),
|
||||||
|
TYPE_F("en-US-Neural2-F"),
|
||||||
|
TYPE_G("en-US-Neural2-G"),
|
||||||
|
TYPE_H("en-US-Neural2-H"),
|
||||||
|
;
|
||||||
|
}
|
||||||
@ -0,0 +1,91 @@
|
|||||||
|
package com.sangdol.text_to_speech
|
||||||
|
|
||||||
|
import io.github.oshai.kotlinlogging.KLogger
|
||||||
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
|
import org.springframework.boot.CommandLineRunner
|
||||||
|
import org.springframework.context.annotation.Profile
|
||||||
|
import org.springframework.stereotype.Component
|
||||||
|
|
||||||
|
private val log: KLogger = KotlinLogging.logger {}
|
||||||
|
|
||||||
|
@Profile("none")
|
||||||
|
@Component
|
||||||
|
class SampleCreateTestRunner(
|
||||||
|
private val singleTextConverter: SingleTextConverter
|
||||||
|
) : CommandLineRunner {
|
||||||
|
|
||||||
|
override fun run(vararg args: String?) {
|
||||||
|
listOf(
|
||||||
|
ConvertSingleTextRequest(
|
||||||
|
text = "Did you find the book you were looking for?",
|
||||||
|
type = Neural2FemaleVoice.TYPE_C,
|
||||||
|
speakingRate = 1.0,
|
||||||
|
order = 1
|
||||||
|
),
|
||||||
|
ConvertSingleTextRequest(
|
||||||
|
text = "Yes, it’s right here. It’s exactly what I need for my project.",
|
||||||
|
type = Neural2MaleVoice.TYPE_A,
|
||||||
|
speakingRate = 1.0,
|
||||||
|
order = 2
|
||||||
|
),
|
||||||
|
ConvertSingleTextRequest(
|
||||||
|
text = "Great. Do you need help with anything else?",
|
||||||
|
type = Neural2FemaleVoice.TYPE_E,
|
||||||
|
speakingRate = 1.0,
|
||||||
|
order = 3
|
||||||
|
),
|
||||||
|
ConvertSingleTextRequest(
|
||||||
|
text = "Could you show me how to use the copy machine?",
|
||||||
|
type = Neural2MaleVoice.TYPE_D,
|
||||||
|
speakingRate = 1.0,
|
||||||
|
order = 4
|
||||||
|
),
|
||||||
|
ConvertSingleTextRequest(
|
||||||
|
text = "Sure, it’s over there in the corner. Let me guide you.",
|
||||||
|
type = Neural2FemaleVoice.TYPE_F,
|
||||||
|
speakingRate = 1.0,
|
||||||
|
order = 5
|
||||||
|
),
|
||||||
|
).forEach {
|
||||||
|
val type: VoiceType = it.type
|
||||||
|
val filePath = "library/sample-scripts-1/${it.order}(${genderString(type)}-${type.name}).mp3"
|
||||||
|
singleTextConverter.convert(it, filePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
listOf(
|
||||||
|
ConvertSingleTextRequest(
|
||||||
|
text = "I want to pick up a new hobby, but I’m not sure what to try.",
|
||||||
|
type = Neural2FemaleVoice.TYPE_C,
|
||||||
|
speakingRate = 1.0,
|
||||||
|
order = 1
|
||||||
|
),
|
||||||
|
ConvertSingleTextRequest(
|
||||||
|
text = "Have you thought about gardening? ",
|
||||||
|
type = Neural2MaleVoice.TYPE_A,
|
||||||
|
speakingRate = 1.0,
|
||||||
|
order = 2
|
||||||
|
),
|
||||||
|
ConvertSingleTextRequest(
|
||||||
|
text = "Gardening? I’ve never tried it before. What can I grow?",
|
||||||
|
type = Neural2FemaleVoice.TYPE_E,
|
||||||
|
speakingRate = 1.0,
|
||||||
|
order = 3
|
||||||
|
),
|
||||||
|
ConvertSingleTextRequest(
|
||||||
|
text = "You can start with some flowers for your balcony.",
|
||||||
|
type = Neural2MaleVoice.TYPE_D,
|
||||||
|
speakingRate = 1.0,
|
||||||
|
order = 4
|
||||||
|
),
|
||||||
|
).forEach {
|
||||||
|
val type: VoiceType = it.type
|
||||||
|
val filePath = "library/sample-scripts-1/${it.order}(${genderString(type)}-${type.name}).mp3"
|
||||||
|
singleTextConverter.convert(it, filePath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun genderString(type: VoiceType) = when(type) {
|
||||||
|
is Neural2MaleVoice -> "male"
|
||||||
|
is Neural2FemaleVoice -> "female"
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,44 @@
|
|||||||
|
package com.sangdol.text_to_speech
|
||||||
|
|
||||||
|
import com.google.cloud.texttospeech.v1.*
|
||||||
|
import com.google.protobuf.ByteString
|
||||||
|
import org.springframework.stereotype.Component
|
||||||
|
import java.io.File
|
||||||
|
import java.io.FileOutputStream
|
||||||
|
|
||||||
|
@Component
|
||||||
|
class SingleTextConverter(
|
||||||
|
private val ttsClient: TextToSpeechClient
|
||||||
|
) {
|
||||||
|
fun convert(
|
||||||
|
request: ConvertSingleTextRequest,
|
||||||
|
filePath: String,
|
||||||
|
language: TtsLanguageCode = TtsLanguageCode.ENGLISH_US,
|
||||||
|
encodingType: AudioEncoding = AudioEncoding.MP3
|
||||||
|
) {
|
||||||
|
val synthesisInput = SynthesisInput.newBuilder()
|
||||||
|
.setText(request.text)
|
||||||
|
.build()
|
||||||
|
|
||||||
|
val voiceOptions = VoiceSelectionParams.newBuilder()
|
||||||
|
.setLanguageCode(language.code)
|
||||||
|
.setName(request.type.identifier)
|
||||||
|
.build()
|
||||||
|
|
||||||
|
val audioConfig = AudioConfig.newBuilder()
|
||||||
|
.setAudioEncoding(encodingType)
|
||||||
|
.setSpeakingRate(request.speakingRate)
|
||||||
|
.build()
|
||||||
|
|
||||||
|
val synthesizeResponse: SynthesizeSpeechResponse = ttsClient.synthesizeSpeech(synthesisInput, voiceOptions, audioConfig)
|
||||||
|
val content: ByteString = synthesizeResponse.audioContent
|
||||||
|
|
||||||
|
val file = File(filePath).also {
|
||||||
|
it.parentFile
|
||||||
|
.takeIf { dir -> !dir.exists() }
|
||||||
|
?.mkdirs()
|
||||||
|
}
|
||||||
|
|
||||||
|
FileOutputStream(file).use { it.write(content.toByteArray()) }
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -4,8 +4,8 @@ import org.springframework.boot.autoconfigure.SpringBootApplication
|
|||||||
import org.springframework.boot.runApplication
|
import org.springframework.boot.runApplication
|
||||||
|
|
||||||
@SpringBootApplication
|
@SpringBootApplication
|
||||||
class TextToSpeechApplication
|
class TTSApplication
|
||||||
|
|
||||||
fun main(args: Array<String>) {
|
fun main(args: Array<String>) {
|
||||||
runApplication<TextToSpeechApplication>(*args)
|
runApplication<TTSApplication>(*args)
|
||||||
}
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user