Compare commits
7 Commits
136eca180b
...
def057d8e9
| Author | SHA1 | Date | |
|---|---|---|---|
| def057d8e9 | |||
| a8c1dbc9c9 | |||
| 2134ca58f4 | |||
| 67c724ddbd | |||
| fc9331d411 | |||
| 1abad2258d | |||
| 59eee6131f |
1
.gitignore
vendored
1
.gitignore
vendored
@ -130,3 +130,4 @@ replay_pid*
|
||||
# Kotlin Gradle plugin data, see https://kotlinlang.org/docs/whatsnew20.html#new-directory-for-kotlin-data-in-gradle-projects
|
||||
.kotlin/
|
||||
gcp/
|
||||
library
|
||||
@ -20,31 +20,28 @@ repositories {
|
||||
mavenCentral()
|
||||
}
|
||||
|
||||
extra["springCloudGcpVersion"] = "7.1.0"
|
||||
extra["springCloudVersion"] = "2025.0.0"
|
||||
|
||||
dependencies {
|
||||
implementation("org.springframework.boot:spring-boot-starter-data-jpa")
|
||||
implementation("org.springframework.boot:spring-boot-starter-mail")
|
||||
implementation("org.springframework.boot:spring-boot-starter-validation")
|
||||
implementation("org.springframework.boot:spring-boot-starter-web")
|
||||
|
||||
implementation("com.fasterxml.jackson.module:jackson-module-kotlin")
|
||||
implementation("com.google.cloud:spring-cloud-gcp-starter")
|
||||
implementation("io.github.oshai:kotlin-logging-jvm:7.0.3")
|
||||
implementation("org.jetbrains.kotlin:kotlin-reflect")
|
||||
|
||||
implementation("com.google.cloud:google-cloud-texttospeech:2.68.0")
|
||||
implementation("org.apache.poi:poi:5.4.1")
|
||||
implementation("net.bramp.ffmpeg:ffmpeg:0.8.0")
|
||||
|
||||
runtimeOnly("com.mysql:mysql-connector-j")
|
||||
runtimeOnly("com.h2database:h2")
|
||||
|
||||
testRuntimeOnly("org.junit.platform:junit-platform-launcher")
|
||||
testImplementation("org.springframework.boot:spring-boot-starter-test")
|
||||
testImplementation("org.jetbrains.kotlin:kotlin-test-junit5")
|
||||
runtimeOnly("com.h2database:h2")
|
||||
testRuntimeOnly("org.junit.platform:junit-platform-launcher")
|
||||
}
|
||||
|
||||
dependencyManagement {
|
||||
imports {
|
||||
mavenBom("com.google.cloud:spring-cloud-gcp-dependencies:${property("springCloudGcpVersion")}")
|
||||
mavenBom("org.springframework.cloud:spring-cloud-dependencies:${property("springCloudVersion")}")
|
||||
}
|
||||
testImplementation("io.kotest:kotest-runner-junit5:5.9.1")
|
||||
testImplementation("io.kotest.extensions:kotest-extensions-spring:1.3.0")
|
||||
}
|
||||
|
||||
kotlin {
|
||||
|
||||
@ -0,0 +1,8 @@
|
||||
package com.sangdol.text_to_speech
|
||||
|
||||
data class ConvertSingleTextRequest(
|
||||
val text: String,
|
||||
val type: VoiceType,
|
||||
val speakingRate: Double,
|
||||
val order: Int
|
||||
)
|
||||
94
src/main/kotlin/com/sangdol/text_to_speech/FfmpegUtils.kt
Normal file
94
src/main/kotlin/com/sangdol/text_to_speech/FfmpegUtils.kt
Normal file
@ -0,0 +1,94 @@
|
||||
package com.sangdol.text_to_speech
|
||||
|
||||
import net.bramp.ffmpeg.FFmpeg
|
||||
import net.bramp.ffmpeg.FFmpegExecutor
|
||||
import net.bramp.ffmpeg.builder.FFmpegBuilder
|
||||
import org.springframework.stereotype.Component
|
||||
import java.io.File
|
||||
|
||||
private const val FFMPEG_PATH: String = "/opt/homebrew/bin/ffmpeg"
|
||||
private const val SILENCE_AUDIO_PATH: String = "library/silences"
|
||||
|
||||
private fun convertSilenceAudioFilePath(durationMs: Long) = "$SILENCE_AUDIO_PATH/silence-${durationMs}ms.mp3"
|
||||
|
||||
@Component
|
||||
class FfmpegUtils(
|
||||
private val ffmpegExecutor: FFmpegExecutor = FFmpegExecutor(FFmpeg(FFMPEG_PATH))
|
||||
) {
|
||||
|
||||
fun createSilenceAudio(durationMs: Long) {
|
||||
val durationSeconds = durationMs / 1000.0
|
||||
val outputFile = convertSilenceAudioFilePath(durationMs)
|
||||
|
||||
val command = listOf(
|
||||
FFMPEG_PATH,
|
||||
"-y",
|
||||
"-f", "lavfi",
|
||||
"-i", "anullsrc=r=44100:cl=stereo",
|
||||
"-t", durationSeconds.toString(),
|
||||
"-acodec", "mp3",
|
||||
outputFile
|
||||
)
|
||||
|
||||
val process = ProcessBuilder(command)
|
||||
.redirectErrorStream(true)
|
||||
.start()
|
||||
|
||||
process.inputStream.bufferedReader().forEachLine { println(it) }
|
||||
process.waitFor()
|
||||
.takeUnless { exitCode -> exitCode == 0 }
|
||||
?: throw RuntimeException("FFmpeg failed with exit code ${process.exitValue()}")
|
||||
}
|
||||
|
||||
fun mergeAudioWithInterval(intervalMs: Long, targetDirectory: String, saveDirectory: String, fileName: String) {
|
||||
val targetDir = File(targetDirectory).also { validateIsValidFileDirectory(it) }
|
||||
val fileNameRegex = """(\d+)(.*)""".toRegex()
|
||||
|
||||
if (intervalMs > 0 && !File(convertSilenceAudioFilePath(intervalMs)).exists()) {
|
||||
createSilenceAudio(intervalMs)
|
||||
}
|
||||
|
||||
val files = targetDir.listFiles().sortedBy { it ->
|
||||
val matchResult: MatchResult = fileNameRegex.find(it.name)
|
||||
?: throw IllegalStateException("Invalid filename-convention")
|
||||
matchResult.groups[1]!!.value.toInt()
|
||||
}.map { "$it" }
|
||||
|
||||
val inputs = mutableListOf<String>()
|
||||
files.forEachIndexed { idx, file ->
|
||||
inputs.add(file)
|
||||
if (intervalMs > 0L && (idx != files.lastIndex)) {
|
||||
inputs.add("library/silences/silence-${intervalMs}ms.mp3")
|
||||
}
|
||||
}
|
||||
|
||||
val builder = FFmpegBuilder()
|
||||
|
||||
inputs.forEach { filePath -> builder.addInput(filePath) }
|
||||
|
||||
val filterInputs = inputs.indices.joinToString("") { idx -> "[$idx:a]" }
|
||||
val filterComplex = "$filterInputs concat=n=${inputs.size}:v=0:a=1[out]"
|
||||
|
||||
builder.setComplexFilter(filterComplex)
|
||||
.addOutput("$saveDirectory/$fileName")
|
||||
.setAudioCodec("mp3")
|
||||
.addExtraArgs("-map", "[out]")
|
||||
.done()
|
||||
|
||||
ffmpegExecutor.createJob(builder).run()
|
||||
}
|
||||
|
||||
private fun validateIsValidFileDirectory(dir: File) {
|
||||
if (!dir.exists()) {
|
||||
throw IllegalArgumentException("Directory not found: $dir")
|
||||
}
|
||||
if (!dir.isDirectory) {
|
||||
throw IllegalArgumentException("Path is not a directory: $dir")
|
||||
}
|
||||
val fileList = dir.listFiles()
|
||||
|
||||
if (fileList == null || fileList.isEmpty()) {
|
||||
throw IllegalArgumentException("Directory is null or empty: $dir")
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,44 @@
|
||||
package com.sangdol.text_to_speech
|
||||
|
||||
import com.google.cloud.texttospeech.v1.TextToSpeechClient
|
||||
import org.springframework.context.annotation.Bean
|
||||
import org.springframework.context.annotation.Configuration
|
||||
|
||||
@Configuration
|
||||
class GoogleTtsConfig {
|
||||
|
||||
@Bean
|
||||
fun textToSpeechClient(): TextToSpeechClient = TextToSpeechClient.create()
|
||||
}
|
||||
|
||||
enum class TtsLanguageCode(
|
||||
val code: String
|
||||
) {
|
||||
ENGLISH_US("en-US")
|
||||
}
|
||||
|
||||
sealed interface VoiceType {
|
||||
val identifier: String
|
||||
val name: String
|
||||
}
|
||||
|
||||
enum class Neural2MaleVoice(
|
||||
override val identifier: String,
|
||||
): VoiceType {
|
||||
TYPE_A("en-US-Neural2-A"),
|
||||
TYPE_D("en-US-Neural2-D"),
|
||||
TYPE_I("en-US-Neural2-I"),
|
||||
TYPE_J("en-US-Neural2-J"),
|
||||
;
|
||||
}
|
||||
|
||||
enum class Neural2FemaleVoice(
|
||||
override val identifier: String
|
||||
): VoiceType {
|
||||
TYPE_C("en-US-Neural2-C"),
|
||||
TYPE_E("en-US-Neural2-E"),
|
||||
TYPE_F("en-US-Neural2-F"),
|
||||
TYPE_G("en-US-Neural2-G"),
|
||||
TYPE_H("en-US-Neural2-H"),
|
||||
;
|
||||
}
|
||||
@ -0,0 +1,91 @@
|
||||
package com.sangdol.text_to_speech
|
||||
|
||||
import io.github.oshai.kotlinlogging.KLogger
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import org.springframework.boot.CommandLineRunner
|
||||
import org.springframework.context.annotation.Profile
|
||||
import org.springframework.stereotype.Component
|
||||
|
||||
private val log: KLogger = KotlinLogging.logger {}
|
||||
|
||||
@Profile("none")
|
||||
@Component
|
||||
class SampleCreateTestRunner(
|
||||
private val singleTextConverter: SingleTextConverter
|
||||
) : CommandLineRunner {
|
||||
|
||||
override fun run(vararg args: String?) {
|
||||
listOf(
|
||||
ConvertSingleTextRequest(
|
||||
text = "Did you find the book you were looking for?",
|
||||
type = Neural2FemaleVoice.TYPE_C,
|
||||
speakingRate = 1.0,
|
||||
order = 1
|
||||
),
|
||||
ConvertSingleTextRequest(
|
||||
text = "Yes, it’s right here. It’s exactly what I need for my project.",
|
||||
type = Neural2MaleVoice.TYPE_A,
|
||||
speakingRate = 1.0,
|
||||
order = 2
|
||||
),
|
||||
ConvertSingleTextRequest(
|
||||
text = "Great. Do you need help with anything else?",
|
||||
type = Neural2FemaleVoice.TYPE_E,
|
||||
speakingRate = 1.0,
|
||||
order = 3
|
||||
),
|
||||
ConvertSingleTextRequest(
|
||||
text = "Could you show me how to use the copy machine?",
|
||||
type = Neural2MaleVoice.TYPE_D,
|
||||
speakingRate = 1.0,
|
||||
order = 4
|
||||
),
|
||||
ConvertSingleTextRequest(
|
||||
text = "Sure, it’s over there in the corner. Let me guide you.",
|
||||
type = Neural2FemaleVoice.TYPE_F,
|
||||
speakingRate = 1.0,
|
||||
order = 5
|
||||
),
|
||||
).forEach {
|
||||
val type: VoiceType = it.type
|
||||
val filePath = "library/sample-scripts-1/${it.order}(${genderString(type)}-${type.name}).mp3"
|
||||
singleTextConverter.convert(it, filePath)
|
||||
}
|
||||
|
||||
listOf(
|
||||
ConvertSingleTextRequest(
|
||||
text = "I want to pick up a new hobby, but I’m not sure what to try.",
|
||||
type = Neural2FemaleVoice.TYPE_C,
|
||||
speakingRate = 1.0,
|
||||
order = 1
|
||||
),
|
||||
ConvertSingleTextRequest(
|
||||
text = "Have you thought about gardening? ",
|
||||
type = Neural2MaleVoice.TYPE_A,
|
||||
speakingRate = 1.0,
|
||||
order = 2
|
||||
),
|
||||
ConvertSingleTextRequest(
|
||||
text = "Gardening? I’ve never tried it before. What can I grow?",
|
||||
type = Neural2FemaleVoice.TYPE_E,
|
||||
speakingRate = 1.0,
|
||||
order = 3
|
||||
),
|
||||
ConvertSingleTextRequest(
|
||||
text = "You can start with some flowers for your balcony.",
|
||||
type = Neural2MaleVoice.TYPE_D,
|
||||
speakingRate = 1.0,
|
||||
order = 4
|
||||
),
|
||||
).forEach {
|
||||
val type: VoiceType = it.type
|
||||
val filePath = "library/sample-scripts-1/${it.order}(${genderString(type)}-${type.name}).mp3"
|
||||
singleTextConverter.convert(it, filePath)
|
||||
}
|
||||
}
|
||||
|
||||
private fun genderString(type: VoiceType) = when(type) {
|
||||
is Neural2MaleVoice -> "male"
|
||||
is Neural2FemaleVoice -> "female"
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,44 @@
|
||||
package com.sangdol.text_to_speech
|
||||
|
||||
import com.google.cloud.texttospeech.v1.*
|
||||
import com.google.protobuf.ByteString
|
||||
import org.springframework.stereotype.Component
|
||||
import java.io.File
|
||||
import java.io.FileOutputStream
|
||||
|
||||
@Component
|
||||
class SingleTextConverter(
|
||||
private val ttsClient: TextToSpeechClient
|
||||
) {
|
||||
fun convert(
|
||||
request: ConvertSingleTextRequest,
|
||||
filePath: String,
|
||||
language: TtsLanguageCode = TtsLanguageCode.ENGLISH_US,
|
||||
encodingType: AudioEncoding = AudioEncoding.MP3
|
||||
) {
|
||||
val synthesisInput = SynthesisInput.newBuilder()
|
||||
.setText(request.text)
|
||||
.build()
|
||||
|
||||
val voiceOptions = VoiceSelectionParams.newBuilder()
|
||||
.setLanguageCode(language.code)
|
||||
.setName(request.type.identifier)
|
||||
.build()
|
||||
|
||||
val audioConfig = AudioConfig.newBuilder()
|
||||
.setAudioEncoding(encodingType)
|
||||
.setSpeakingRate(request.speakingRate)
|
||||
.build()
|
||||
|
||||
val synthesizeResponse: SynthesizeSpeechResponse = ttsClient.synthesizeSpeech(synthesisInput, voiceOptions, audioConfig)
|
||||
val content: ByteString = synthesizeResponse.audioContent
|
||||
|
||||
val file = File(filePath).also {
|
||||
it.parentFile
|
||||
.takeIf { dir -> !dir.exists() }
|
||||
?.mkdirs()
|
||||
}
|
||||
|
||||
FileOutputStream(file).use { it.write(content.toByteArray()) }
|
||||
}
|
||||
}
|
||||
@ -4,8 +4,8 @@ import org.springframework.boot.autoconfigure.SpringBootApplication
|
||||
import org.springframework.boot.runApplication
|
||||
|
||||
@SpringBootApplication
|
||||
class TextToSpeechApplication
|
||||
class TTSApplication
|
||||
|
||||
fun main(args: Array<String>) {
|
||||
runApplication<TextToSpeechApplication>(*args)
|
||||
runApplication<TTSApplication>(*args)
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user