Compare commits

..

7 Commits

8 changed files with 295 additions and 16 deletions

1
.gitignore vendored
View File

@ -130,3 +130,4 @@ replay_pid*
# Kotlin Gradle plugin data, see https://kotlinlang.org/docs/whatsnew20.html#new-directory-for-kotlin-data-in-gradle-projects
.kotlin/
gcp/
library

View File

@ -20,31 +20,28 @@ repositories {
mavenCentral()
}
extra["springCloudGcpVersion"] = "7.1.0"
extra["springCloudVersion"] = "2025.0.0"
dependencies {
implementation("org.springframework.boot:spring-boot-starter-data-jpa")
implementation("org.springframework.boot:spring-boot-starter-mail")
implementation("org.springframework.boot:spring-boot-starter-validation")
implementation("org.springframework.boot:spring-boot-starter-web")
implementation("com.fasterxml.jackson.module:jackson-module-kotlin")
implementation("com.google.cloud:spring-cloud-gcp-starter")
implementation("io.github.oshai:kotlin-logging-jvm:7.0.3")
implementation("org.jetbrains.kotlin:kotlin-reflect")
implementation("com.google.cloud:google-cloud-texttospeech:2.68.0")
implementation("org.apache.poi:poi:5.4.1")
implementation("net.bramp.ffmpeg:ffmpeg:0.8.0")
runtimeOnly("com.mysql:mysql-connector-j")
runtimeOnly("com.h2database:h2")
testRuntimeOnly("org.junit.platform:junit-platform-launcher")
testImplementation("org.springframework.boot:spring-boot-starter-test")
testImplementation("org.jetbrains.kotlin:kotlin-test-junit5")
runtimeOnly("com.h2database:h2")
testRuntimeOnly("org.junit.platform:junit-platform-launcher")
}
dependencyManagement {
imports {
mavenBom("com.google.cloud:spring-cloud-gcp-dependencies:${property("springCloudGcpVersion")}")
mavenBom("org.springframework.cloud:spring-cloud-dependencies:${property("springCloudVersion")}")
}
testImplementation("io.kotest:kotest-runner-junit5:5.9.1")
testImplementation("io.kotest.extensions:kotest-extensions-spring:1.3.0")
}
kotlin {

View File

@ -0,0 +1,8 @@
package com.sangdol.text_to_speech
data class ConvertSingleTextRequest(
val text: String,
val type: VoiceType,
val speakingRate: Double,
val order: Int
)

View File

@ -0,0 +1,94 @@
package com.sangdol.text_to_speech
import net.bramp.ffmpeg.FFmpeg
import net.bramp.ffmpeg.FFmpegExecutor
import net.bramp.ffmpeg.builder.FFmpegBuilder
import org.springframework.stereotype.Component
import java.io.File
private const val FFMPEG_PATH: String = "/opt/homebrew/bin/ffmpeg"
private const val SILENCE_AUDIO_PATH: String = "library/silences"
private fun convertSilenceAudioFilePath(durationMs: Long) = "$SILENCE_AUDIO_PATH/silence-${durationMs}ms.mp3"
@Component
class FfmpegUtils(
private val ffmpegExecutor: FFmpegExecutor = FFmpegExecutor(FFmpeg(FFMPEG_PATH))
) {
fun createSilenceAudio(durationMs: Long) {
val durationSeconds = durationMs / 1000.0
val outputFile = convertSilenceAudioFilePath(durationMs)
val command = listOf(
FFMPEG_PATH,
"-y",
"-f", "lavfi",
"-i", "anullsrc=r=44100:cl=stereo",
"-t", durationSeconds.toString(),
"-acodec", "mp3",
outputFile
)
val process = ProcessBuilder(command)
.redirectErrorStream(true)
.start()
process.inputStream.bufferedReader().forEachLine { println(it) }
process.waitFor()
.takeUnless { exitCode -> exitCode == 0 }
?: throw RuntimeException("FFmpeg failed with exit code ${process.exitValue()}")
}
fun mergeAudioWithInterval(intervalMs: Long, targetDirectory: String, saveDirectory: String, fileName: String) {
val targetDir = File(targetDirectory).also { validateIsValidFileDirectory(it) }
val fileNameRegex = """(\d+)(.*)""".toRegex()
if (intervalMs > 0 && !File(convertSilenceAudioFilePath(intervalMs)).exists()) {
createSilenceAudio(intervalMs)
}
val files = targetDir.listFiles().sortedBy { it ->
val matchResult: MatchResult = fileNameRegex.find(it.name)
?: throw IllegalStateException("Invalid filename-convention")
matchResult.groups[1]!!.value.toInt()
}.map { "$it" }
val inputs = mutableListOf<String>()
files.forEachIndexed { idx, file ->
inputs.add(file)
if (intervalMs > 0L && (idx != files.lastIndex)) {
inputs.add("library/silences/silence-${intervalMs}ms.mp3")
}
}
val builder = FFmpegBuilder()
inputs.forEach { filePath -> builder.addInput(filePath) }
val filterInputs = inputs.indices.joinToString("") { idx -> "[$idx:a]" }
val filterComplex = "$filterInputs concat=n=${inputs.size}:v=0:a=1[out]"
builder.setComplexFilter(filterComplex)
.addOutput("$saveDirectory/$fileName")
.setAudioCodec("mp3")
.addExtraArgs("-map", "[out]")
.done()
ffmpegExecutor.createJob(builder).run()
}
private fun validateIsValidFileDirectory(dir: File) {
if (!dir.exists()) {
throw IllegalArgumentException("Directory not found: $dir")
}
if (!dir.isDirectory) {
throw IllegalArgumentException("Path is not a directory: $dir")
}
val fileList = dir.listFiles()
if (fileList == null || fileList.isEmpty()) {
throw IllegalArgumentException("Directory is null or empty: $dir")
}
}
}

View File

@ -0,0 +1,44 @@
package com.sangdol.text_to_speech
import com.google.cloud.texttospeech.v1.TextToSpeechClient
import org.springframework.context.annotation.Bean
import org.springframework.context.annotation.Configuration
@Configuration
class GoogleTtsConfig {
@Bean
fun textToSpeechClient(): TextToSpeechClient = TextToSpeechClient.create()
}
enum class TtsLanguageCode(
val code: String
) {
ENGLISH_US("en-US")
}
sealed interface VoiceType {
val identifier: String
val name: String
}
enum class Neural2MaleVoice(
override val identifier: String,
): VoiceType {
TYPE_A("en-US-Neural2-A"),
TYPE_D("en-US-Neural2-D"),
TYPE_I("en-US-Neural2-I"),
TYPE_J("en-US-Neural2-J"),
;
}
enum class Neural2FemaleVoice(
override val identifier: String
): VoiceType {
TYPE_C("en-US-Neural2-C"),
TYPE_E("en-US-Neural2-E"),
TYPE_F("en-US-Neural2-F"),
TYPE_G("en-US-Neural2-G"),
TYPE_H("en-US-Neural2-H"),
;
}

View File

@ -0,0 +1,91 @@
package com.sangdol.text_to_speech
import io.github.oshai.kotlinlogging.KLogger
import io.github.oshai.kotlinlogging.KotlinLogging
import org.springframework.boot.CommandLineRunner
import org.springframework.context.annotation.Profile
import org.springframework.stereotype.Component
private val log: KLogger = KotlinLogging.logger {}
@Profile("none")
@Component
class SampleCreateTestRunner(
private val singleTextConverter: SingleTextConverter
) : CommandLineRunner {
override fun run(vararg args: String?) {
listOf(
ConvertSingleTextRequest(
text = "Did you find the book you were looking for?",
type = Neural2FemaleVoice.TYPE_C,
speakingRate = 1.0,
order = 1
),
ConvertSingleTextRequest(
text = "Yes, its right here. Its exactly what I need for my project.",
type = Neural2MaleVoice.TYPE_A,
speakingRate = 1.0,
order = 2
),
ConvertSingleTextRequest(
text = "Great. Do you need help with anything else?",
type = Neural2FemaleVoice.TYPE_E,
speakingRate = 1.0,
order = 3
),
ConvertSingleTextRequest(
text = "Could you show me how to use the copy machine?",
type = Neural2MaleVoice.TYPE_D,
speakingRate = 1.0,
order = 4
),
ConvertSingleTextRequest(
text = "Sure, its over there in the corner. Let me guide you.",
type = Neural2FemaleVoice.TYPE_F,
speakingRate = 1.0,
order = 5
),
).forEach {
val type: VoiceType = it.type
val filePath = "library/sample-scripts-1/${it.order}(${genderString(type)}-${type.name}).mp3"
singleTextConverter.convert(it, filePath)
}
listOf(
ConvertSingleTextRequest(
text = "I want to pick up a new hobby, but Im not sure what to try.",
type = Neural2FemaleVoice.TYPE_C,
speakingRate = 1.0,
order = 1
),
ConvertSingleTextRequest(
text = "Have you thought about gardening? ",
type = Neural2MaleVoice.TYPE_A,
speakingRate = 1.0,
order = 2
),
ConvertSingleTextRequest(
text = "Gardening? Ive never tried it before. What can I grow?",
type = Neural2FemaleVoice.TYPE_E,
speakingRate = 1.0,
order = 3
),
ConvertSingleTextRequest(
text = "You can start with some flowers for your balcony.",
type = Neural2MaleVoice.TYPE_D,
speakingRate = 1.0,
order = 4
),
).forEach {
val type: VoiceType = it.type
val filePath = "library/sample-scripts-1/${it.order}(${genderString(type)}-${type.name}).mp3"
singleTextConverter.convert(it, filePath)
}
}
private fun genderString(type: VoiceType) = when(type) {
is Neural2MaleVoice -> "male"
is Neural2FemaleVoice -> "female"
}
}

View File

@ -0,0 +1,44 @@
package com.sangdol.text_to_speech
import com.google.cloud.texttospeech.v1.*
import com.google.protobuf.ByteString
import org.springframework.stereotype.Component
import java.io.File
import java.io.FileOutputStream
@Component
class SingleTextConverter(
private val ttsClient: TextToSpeechClient
) {
fun convert(
request: ConvertSingleTextRequest,
filePath: String,
language: TtsLanguageCode = TtsLanguageCode.ENGLISH_US,
encodingType: AudioEncoding = AudioEncoding.MP3
) {
val synthesisInput = SynthesisInput.newBuilder()
.setText(request.text)
.build()
val voiceOptions = VoiceSelectionParams.newBuilder()
.setLanguageCode(language.code)
.setName(request.type.identifier)
.build()
val audioConfig = AudioConfig.newBuilder()
.setAudioEncoding(encodingType)
.setSpeakingRate(request.speakingRate)
.build()
val synthesizeResponse: SynthesizeSpeechResponse = ttsClient.synthesizeSpeech(synthesisInput, voiceOptions, audioConfig)
val content: ByteString = synthesizeResponse.audioContent
val file = File(filePath).also {
it.parentFile
.takeIf { dir -> !dir.exists() }
?.mkdirs()
}
FileOutputStream(file).use { it.write(content.toByteArray()) }
}
}

View File

@ -4,8 +4,8 @@ import org.springframework.boot.autoconfigure.SpringBootApplication
import org.springframework.boot.runApplication
@SpringBootApplication
class TextToSpeechApplication
class TTSApplication
fun main(args: Array<String>) {
runApplication<TextToSpeechApplication>(*args)
runApplication<TTSApplication>(*args)
}