Compare commits
No commits in common. "def057d8e970ec989433bdf4d0ff41e3c72ebec4" and "136eca180b5ea128e6f15d443c015b96157ef540" have entirely different histories.
def057d8e9
...
136eca180b
3
.gitignore
vendored
3
.gitignore
vendored
@ -129,5 +129,4 @@ replay_pid*
|
|||||||
|
|
||||||
# Kotlin Gradle plugin data, see https://kotlinlang.org/docs/whatsnew20.html#new-directory-for-kotlin-data-in-gradle-projects
|
# Kotlin Gradle plugin data, see https://kotlinlang.org/docs/whatsnew20.html#new-directory-for-kotlin-data-in-gradle-projects
|
||||||
.kotlin/
|
.kotlin/
|
||||||
gcp/
|
gcp/
|
||||||
library
|
|
||||||
@ -20,28 +20,31 @@ repositories {
|
|||||||
mavenCentral()
|
mavenCentral()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extra["springCloudGcpVersion"] = "7.1.0"
|
||||||
|
extra["springCloudVersion"] = "2025.0.0"
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation("org.springframework.boot:spring-boot-starter-data-jpa")
|
implementation("org.springframework.boot:spring-boot-starter-data-jpa")
|
||||||
implementation("org.springframework.boot:spring-boot-starter-mail")
|
implementation("org.springframework.boot:spring-boot-starter-mail")
|
||||||
implementation("org.springframework.boot:spring-boot-starter-validation")
|
implementation("org.springframework.boot:spring-boot-starter-validation")
|
||||||
implementation("org.springframework.boot:spring-boot-starter-web")
|
implementation("org.springframework.boot:spring-boot-starter-web")
|
||||||
|
|
||||||
implementation("com.fasterxml.jackson.module:jackson-module-kotlin")
|
implementation("com.fasterxml.jackson.module:jackson-module-kotlin")
|
||||||
implementation("io.github.oshai:kotlin-logging-jvm:7.0.3")
|
implementation("com.google.cloud:spring-cloud-gcp-starter")
|
||||||
implementation("org.jetbrains.kotlin:kotlin-reflect")
|
implementation("org.jetbrains.kotlin:kotlin-reflect")
|
||||||
|
|
||||||
implementation("com.google.cloud:google-cloud-texttospeech:2.68.0")
|
implementation("com.google.cloud:google-cloud-texttospeech:2.68.0")
|
||||||
implementation("org.apache.poi:poi:5.4.1")
|
implementation("org.apache.poi:poi:5.4.1")
|
||||||
implementation("net.bramp.ffmpeg:ffmpeg:0.8.0")
|
|
||||||
|
|
||||||
runtimeOnly("com.mysql:mysql-connector-j")
|
runtimeOnly("com.mysql:mysql-connector-j")
|
||||||
runtimeOnly("com.h2database:h2")
|
|
||||||
|
|
||||||
testRuntimeOnly("org.junit.platform:junit-platform-launcher")
|
|
||||||
testImplementation("org.springframework.boot:spring-boot-starter-test")
|
testImplementation("org.springframework.boot:spring-boot-starter-test")
|
||||||
testImplementation("org.jetbrains.kotlin:kotlin-test-junit5")
|
testImplementation("org.jetbrains.kotlin:kotlin-test-junit5")
|
||||||
testImplementation("io.kotest:kotest-runner-junit5:5.9.1")
|
runtimeOnly("com.h2database:h2")
|
||||||
testImplementation("io.kotest.extensions:kotest-extensions-spring:1.3.0")
|
testRuntimeOnly("org.junit.platform:junit-platform-launcher")
|
||||||
|
}
|
||||||
|
|
||||||
|
dependencyManagement {
|
||||||
|
imports {
|
||||||
|
mavenBom("com.google.cloud:spring-cloud-gcp-dependencies:${property("springCloudGcpVersion")}")
|
||||||
|
mavenBom("org.springframework.cloud:spring-cloud-dependencies:${property("springCloudVersion")}")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
kotlin {
|
kotlin {
|
||||||
|
|||||||
@ -1,8 +0,0 @@
|
|||||||
package com.sangdol.text_to_speech
|
|
||||||
|
|
||||||
data class ConvertSingleTextRequest(
|
|
||||||
val text: String,
|
|
||||||
val type: VoiceType,
|
|
||||||
val speakingRate: Double,
|
|
||||||
val order: Int
|
|
||||||
)
|
|
||||||
@ -1,94 +0,0 @@
|
|||||||
package com.sangdol.text_to_speech
|
|
||||||
|
|
||||||
import net.bramp.ffmpeg.FFmpeg
|
|
||||||
import net.bramp.ffmpeg.FFmpegExecutor
|
|
||||||
import net.bramp.ffmpeg.builder.FFmpegBuilder
|
|
||||||
import org.springframework.stereotype.Component
|
|
||||||
import java.io.File
|
|
||||||
|
|
||||||
private const val FFMPEG_PATH: String = "/opt/homebrew/bin/ffmpeg"
|
|
||||||
private const val SILENCE_AUDIO_PATH: String = "library/silences"
|
|
||||||
|
|
||||||
private fun convertSilenceAudioFilePath(durationMs: Long) = "$SILENCE_AUDIO_PATH/silence-${durationMs}ms.mp3"
|
|
||||||
|
|
||||||
@Component
|
|
||||||
class FfmpegUtils(
|
|
||||||
private val ffmpegExecutor: FFmpegExecutor = FFmpegExecutor(FFmpeg(FFMPEG_PATH))
|
|
||||||
) {
|
|
||||||
|
|
||||||
fun createSilenceAudio(durationMs: Long) {
|
|
||||||
val durationSeconds = durationMs / 1000.0
|
|
||||||
val outputFile = convertSilenceAudioFilePath(durationMs)
|
|
||||||
|
|
||||||
val command = listOf(
|
|
||||||
FFMPEG_PATH,
|
|
||||||
"-y",
|
|
||||||
"-f", "lavfi",
|
|
||||||
"-i", "anullsrc=r=44100:cl=stereo",
|
|
||||||
"-t", durationSeconds.toString(),
|
|
||||||
"-acodec", "mp3",
|
|
||||||
outputFile
|
|
||||||
)
|
|
||||||
|
|
||||||
val process = ProcessBuilder(command)
|
|
||||||
.redirectErrorStream(true)
|
|
||||||
.start()
|
|
||||||
|
|
||||||
process.inputStream.bufferedReader().forEachLine { println(it) }
|
|
||||||
process.waitFor()
|
|
||||||
.takeUnless { exitCode -> exitCode == 0 }
|
|
||||||
?: throw RuntimeException("FFmpeg failed with exit code ${process.exitValue()}")
|
|
||||||
}
|
|
||||||
|
|
||||||
fun mergeAudioWithInterval(intervalMs: Long, targetDirectory: String, saveDirectory: String, fileName: String) {
|
|
||||||
val targetDir = File(targetDirectory).also { validateIsValidFileDirectory(it) }
|
|
||||||
val fileNameRegex = """(\d+)(.*)""".toRegex()
|
|
||||||
|
|
||||||
if (intervalMs > 0 && !File(convertSilenceAudioFilePath(intervalMs)).exists()) {
|
|
||||||
createSilenceAudio(intervalMs)
|
|
||||||
}
|
|
||||||
|
|
||||||
val files = targetDir.listFiles().sortedBy { it ->
|
|
||||||
val matchResult: MatchResult = fileNameRegex.find(it.name)
|
|
||||||
?: throw IllegalStateException("Invalid filename-convention")
|
|
||||||
matchResult.groups[1]!!.value.toInt()
|
|
||||||
}.map { "$it" }
|
|
||||||
|
|
||||||
val inputs = mutableListOf<String>()
|
|
||||||
files.forEachIndexed { idx, file ->
|
|
||||||
inputs.add(file)
|
|
||||||
if (intervalMs > 0L && (idx != files.lastIndex)) {
|
|
||||||
inputs.add("library/silences/silence-${intervalMs}ms.mp3")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
val builder = FFmpegBuilder()
|
|
||||||
|
|
||||||
inputs.forEach { filePath -> builder.addInput(filePath) }
|
|
||||||
|
|
||||||
val filterInputs = inputs.indices.joinToString("") { idx -> "[$idx:a]" }
|
|
||||||
val filterComplex = "$filterInputs concat=n=${inputs.size}:v=0:a=1[out]"
|
|
||||||
|
|
||||||
builder.setComplexFilter(filterComplex)
|
|
||||||
.addOutput("$saveDirectory/$fileName")
|
|
||||||
.setAudioCodec("mp3")
|
|
||||||
.addExtraArgs("-map", "[out]")
|
|
||||||
.done()
|
|
||||||
|
|
||||||
ffmpegExecutor.createJob(builder).run()
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun validateIsValidFileDirectory(dir: File) {
|
|
||||||
if (!dir.exists()) {
|
|
||||||
throw IllegalArgumentException("Directory not found: $dir")
|
|
||||||
}
|
|
||||||
if (!dir.isDirectory) {
|
|
||||||
throw IllegalArgumentException("Path is not a directory: $dir")
|
|
||||||
}
|
|
||||||
val fileList = dir.listFiles()
|
|
||||||
|
|
||||||
if (fileList == null || fileList.isEmpty()) {
|
|
||||||
throw IllegalArgumentException("Directory is null or empty: $dir")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,44 +0,0 @@
|
|||||||
package com.sangdol.text_to_speech
|
|
||||||
|
|
||||||
import com.google.cloud.texttospeech.v1.TextToSpeechClient
|
|
||||||
import org.springframework.context.annotation.Bean
|
|
||||||
import org.springframework.context.annotation.Configuration
|
|
||||||
|
|
||||||
@Configuration
|
|
||||||
class GoogleTtsConfig {
|
|
||||||
|
|
||||||
@Bean
|
|
||||||
fun textToSpeechClient(): TextToSpeechClient = TextToSpeechClient.create()
|
|
||||||
}
|
|
||||||
|
|
||||||
enum class TtsLanguageCode(
|
|
||||||
val code: String
|
|
||||||
) {
|
|
||||||
ENGLISH_US("en-US")
|
|
||||||
}
|
|
||||||
|
|
||||||
sealed interface VoiceType {
|
|
||||||
val identifier: String
|
|
||||||
val name: String
|
|
||||||
}
|
|
||||||
|
|
||||||
enum class Neural2MaleVoice(
|
|
||||||
override val identifier: String,
|
|
||||||
): VoiceType {
|
|
||||||
TYPE_A("en-US-Neural2-A"),
|
|
||||||
TYPE_D("en-US-Neural2-D"),
|
|
||||||
TYPE_I("en-US-Neural2-I"),
|
|
||||||
TYPE_J("en-US-Neural2-J"),
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
enum class Neural2FemaleVoice(
|
|
||||||
override val identifier: String
|
|
||||||
): VoiceType {
|
|
||||||
TYPE_C("en-US-Neural2-C"),
|
|
||||||
TYPE_E("en-US-Neural2-E"),
|
|
||||||
TYPE_F("en-US-Neural2-F"),
|
|
||||||
TYPE_G("en-US-Neural2-G"),
|
|
||||||
TYPE_H("en-US-Neural2-H"),
|
|
||||||
;
|
|
||||||
}
|
|
||||||
@ -1,91 +0,0 @@
|
|||||||
package com.sangdol.text_to_speech
|
|
||||||
|
|
||||||
import io.github.oshai.kotlinlogging.KLogger
|
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
|
||||||
import org.springframework.boot.CommandLineRunner
|
|
||||||
import org.springframework.context.annotation.Profile
|
|
||||||
import org.springframework.stereotype.Component
|
|
||||||
|
|
||||||
private val log: KLogger = KotlinLogging.logger {}
|
|
||||||
|
|
||||||
@Profile("none")
|
|
||||||
@Component
|
|
||||||
class SampleCreateTestRunner(
|
|
||||||
private val singleTextConverter: SingleTextConverter
|
|
||||||
) : CommandLineRunner {
|
|
||||||
|
|
||||||
override fun run(vararg args: String?) {
|
|
||||||
listOf(
|
|
||||||
ConvertSingleTextRequest(
|
|
||||||
text = "Did you find the book you were looking for?",
|
|
||||||
type = Neural2FemaleVoice.TYPE_C,
|
|
||||||
speakingRate = 1.0,
|
|
||||||
order = 1
|
|
||||||
),
|
|
||||||
ConvertSingleTextRequest(
|
|
||||||
text = "Yes, it’s right here. It’s exactly what I need for my project.",
|
|
||||||
type = Neural2MaleVoice.TYPE_A,
|
|
||||||
speakingRate = 1.0,
|
|
||||||
order = 2
|
|
||||||
),
|
|
||||||
ConvertSingleTextRequest(
|
|
||||||
text = "Great. Do you need help with anything else?",
|
|
||||||
type = Neural2FemaleVoice.TYPE_E,
|
|
||||||
speakingRate = 1.0,
|
|
||||||
order = 3
|
|
||||||
),
|
|
||||||
ConvertSingleTextRequest(
|
|
||||||
text = "Could you show me how to use the copy machine?",
|
|
||||||
type = Neural2MaleVoice.TYPE_D,
|
|
||||||
speakingRate = 1.0,
|
|
||||||
order = 4
|
|
||||||
),
|
|
||||||
ConvertSingleTextRequest(
|
|
||||||
text = "Sure, it’s over there in the corner. Let me guide you.",
|
|
||||||
type = Neural2FemaleVoice.TYPE_F,
|
|
||||||
speakingRate = 1.0,
|
|
||||||
order = 5
|
|
||||||
),
|
|
||||||
).forEach {
|
|
||||||
val type: VoiceType = it.type
|
|
||||||
val filePath = "library/sample-scripts-1/${it.order}(${genderString(type)}-${type.name}).mp3"
|
|
||||||
singleTextConverter.convert(it, filePath)
|
|
||||||
}
|
|
||||||
|
|
||||||
listOf(
|
|
||||||
ConvertSingleTextRequest(
|
|
||||||
text = "I want to pick up a new hobby, but I’m not sure what to try.",
|
|
||||||
type = Neural2FemaleVoice.TYPE_C,
|
|
||||||
speakingRate = 1.0,
|
|
||||||
order = 1
|
|
||||||
),
|
|
||||||
ConvertSingleTextRequest(
|
|
||||||
text = "Have you thought about gardening? ",
|
|
||||||
type = Neural2MaleVoice.TYPE_A,
|
|
||||||
speakingRate = 1.0,
|
|
||||||
order = 2
|
|
||||||
),
|
|
||||||
ConvertSingleTextRequest(
|
|
||||||
text = "Gardening? I’ve never tried it before. What can I grow?",
|
|
||||||
type = Neural2FemaleVoice.TYPE_E,
|
|
||||||
speakingRate = 1.0,
|
|
||||||
order = 3
|
|
||||||
),
|
|
||||||
ConvertSingleTextRequest(
|
|
||||||
text = "You can start with some flowers for your balcony.",
|
|
||||||
type = Neural2MaleVoice.TYPE_D,
|
|
||||||
speakingRate = 1.0,
|
|
||||||
order = 4
|
|
||||||
),
|
|
||||||
).forEach {
|
|
||||||
val type: VoiceType = it.type
|
|
||||||
val filePath = "library/sample-scripts-1/${it.order}(${genderString(type)}-${type.name}).mp3"
|
|
||||||
singleTextConverter.convert(it, filePath)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun genderString(type: VoiceType) = when(type) {
|
|
||||||
is Neural2MaleVoice -> "male"
|
|
||||||
is Neural2FemaleVoice -> "female"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,44 +0,0 @@
|
|||||||
package com.sangdol.text_to_speech
|
|
||||||
|
|
||||||
import com.google.cloud.texttospeech.v1.*
|
|
||||||
import com.google.protobuf.ByteString
|
|
||||||
import org.springframework.stereotype.Component
|
|
||||||
import java.io.File
|
|
||||||
import java.io.FileOutputStream
|
|
||||||
|
|
||||||
@Component
|
|
||||||
class SingleTextConverter(
|
|
||||||
private val ttsClient: TextToSpeechClient
|
|
||||||
) {
|
|
||||||
fun convert(
|
|
||||||
request: ConvertSingleTextRequest,
|
|
||||||
filePath: String,
|
|
||||||
language: TtsLanguageCode = TtsLanguageCode.ENGLISH_US,
|
|
||||||
encodingType: AudioEncoding = AudioEncoding.MP3
|
|
||||||
) {
|
|
||||||
val synthesisInput = SynthesisInput.newBuilder()
|
|
||||||
.setText(request.text)
|
|
||||||
.build()
|
|
||||||
|
|
||||||
val voiceOptions = VoiceSelectionParams.newBuilder()
|
|
||||||
.setLanguageCode(language.code)
|
|
||||||
.setName(request.type.identifier)
|
|
||||||
.build()
|
|
||||||
|
|
||||||
val audioConfig = AudioConfig.newBuilder()
|
|
||||||
.setAudioEncoding(encodingType)
|
|
||||||
.setSpeakingRate(request.speakingRate)
|
|
||||||
.build()
|
|
||||||
|
|
||||||
val synthesizeResponse: SynthesizeSpeechResponse = ttsClient.synthesizeSpeech(synthesisInput, voiceOptions, audioConfig)
|
|
||||||
val content: ByteString = synthesizeResponse.audioContent
|
|
||||||
|
|
||||||
val file = File(filePath).also {
|
|
||||||
it.parentFile
|
|
||||||
.takeIf { dir -> !dir.exists() }
|
|
||||||
?.mkdirs()
|
|
||||||
}
|
|
||||||
|
|
||||||
FileOutputStream(file).use { it.write(content.toByteArray()) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -4,8 +4,8 @@ import org.springframework.boot.autoconfigure.SpringBootApplication
|
|||||||
import org.springframework.boot.runApplication
|
import org.springframework.boot.runApplication
|
||||||
|
|
||||||
@SpringBootApplication
|
@SpringBootApplication
|
||||||
class TTSApplication
|
class TextToSpeechApplication
|
||||||
|
|
||||||
fun main(args: Array<String>) {
|
fun main(args: Array<String>) {
|
||||||
runApplication<TTSApplication>(*args)
|
runApplication<TextToSpeechApplication>(*args)
|
||||||
}
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user