feat: Apache POI gradle 의존성 및 행정안전부 인구현황 파일 전처리 유틸 클래스 추가

This commit is contained in:
이상진 2025-09-14 14:52:20 +09:00
parent 116dd24e26
commit 7530e1038e
3 changed files with 93 additions and 0 deletions

View File

@ -79,6 +79,9 @@ dependencies {
// RestAssured
testImplementation("io.rest-assured:rest-assured:5.5.5")
testImplementation("io.rest-assured:kotlin-extensions:5.5.5")
// etc
implementation("org.apache.poi:poi-ooxml:5.2.3")
}
tasks.withType<Test> {

BIN
data/population.xlsx Normal file

Binary file not shown.

View File

@ -0,0 +1,90 @@
package roomescape.data
import io.kotest.core.spec.style.StringSpec
import org.apache.poi.xssf.usermodel.XSSFWorkbook
import java.io.File
class PopulationDataSqlParser() : StringSpec({
val regionCodePattern = Regex("^[0-9]{10}$")
val minPopulation = 250_000
"인구 데이터를 이용하여 지역 정보 SQL 파일로 변환하고, 추가로 $minPopulation 이상의 시/군/구는 매장 데이터 생성을 위해 따로 분류한다." {
val populationXlsx = XSSFWorkbook(File("data/population.xlsx"))
val sheet = populationXlsx.getSheetAt(0)
val allRegion = mutableListOf<List<String>>()
val regionsMoreThanMinPopulation = mutableListOf<List<String?>>()
sheet.rowIterator().forEach { row ->
val regionCode = row.getCell(0)?.stringCellValue ?: return@forEach
if (regionCodePattern.matches(regionCode).not()) {
return@forEach
}
val sidoCode = regionCode.substring(0, 2)
val sigunguCode = regionCode.substring(2, 5)
val population = row.getCell(2).stringCellValue.replace(",", "")
if (Regex("^[0-9]+$").matches(population).not()) {
return@forEach
}
val regionName = row.getCell(1).stringCellValue
if (!regionName.trim().contains(" ")) {
return@forEach
}
val parts = regionName.split(" ")
val sidoName = parts[0].trim()
val sigunguName = parts[1].trim()
val guName = if (parts.size > 2) parts[2].trim() else ""
val populationInt = population.toInt()
if (populationInt <= 0) {
return@forEach
}
if (populationInt > minPopulation) {
regionsMoreThanMinPopulation.add(listOf(
regionCode,
sidoCode,
sigunguCode,
sidoName,
sigunguName,
guName,
population
)
)
}
allRegion.add(listOf(regionCode, sidoCode, sigunguCode, sidoName, sigunguName))
}
regionsMoreThanMinPopulation.filter {
val sigunguName = it[4]
val sameSigungu = regionsMoreThanMinPopulation.filter { r -> r[4] == sigunguName }
!((sameSigungu.size > 1) && (it[5].isNullOrBlank()))
}.mapIndexed { idx, values ->
val guName = if (values[5].isNullOrBlank()) "NULL" else "'${values[5]}'"
"${values[0]}, ${values[1]}, ${values[2]}, ${values[3]}, ${values[4]}, $guName, ${values[6]}"
}.joinToString(separator = "\n").also {
File("data/region_population.txt").writeText(it)
}
allRegion.distinctBy { it[1] to it[2] }.filter {
val sidoName = it[3]
val sigunguName = it[4]
val sameSigungu = allRegion.filter { r -> r[3] == sidoName && r[4] == sigunguName }
!((sameSigungu.size > 1) && sameSigungu.minByOrNull { r -> r[2].toInt() }!![2] != it[2])
}.joinToString(
prefix = "INSERT INTO region(code, sido_code, sigungu_code, sido_name, sigungu_name) VALUES ",
separator = ",\n"
) { region ->
"('${region[0]}', '${region[1]}', '${region[2]}', '${region[3]}', '${region[4]}')"
}.also {
File("data/region.sql").writeText("${it};")
}
}
})