diff --git a/build.gradle.kts b/build.gradle.kts index 8405fd6f..c6a792f2 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -79,6 +79,9 @@ dependencies { // RestAssured testImplementation("io.rest-assured:rest-assured:5.5.5") testImplementation("io.rest-assured:kotlin-extensions:5.5.5") + + // etc + implementation("org.apache.poi:poi-ooxml:5.2.3") } tasks.withType { diff --git a/data/population.xlsx b/data/population.xlsx new file mode 100644 index 00000000..af8c2e8c Binary files /dev/null and b/data/population.xlsx differ diff --git a/src/test/kotlin/roomescape/data/DataParser.kt b/src/test/kotlin/roomescape/data/DataParser.kt new file mode 100644 index 00000000..1b7ed25f --- /dev/null +++ b/src/test/kotlin/roomescape/data/DataParser.kt @@ -0,0 +1,90 @@ +package roomescape.data + +import io.kotest.core.spec.style.StringSpec +import org.apache.poi.xssf.usermodel.XSSFWorkbook +import java.io.File + +class PopulationDataSqlParser() : StringSpec({ + + val regionCodePattern = Regex("^[0-9]{10}$") + val minPopulation = 250_000 + + "인구 데이터를 이용하여 지역 정보 SQL 파일로 변환하고, 추가로 $minPopulation 이상의 시/군/구는 매장 데이터 생성을 위해 따로 분류한다." { + val populationXlsx = XSSFWorkbook(File("data/population.xlsx")) + val sheet = populationXlsx.getSheetAt(0) + val allRegion = mutableListOf>() + val regionsMoreThanMinPopulation = mutableListOf>() + + sheet.rowIterator().forEach { row -> + val regionCode = row.getCell(0)?.stringCellValue ?: return@forEach + + if (regionCodePattern.matches(regionCode).not()) { + return@forEach + } + val sidoCode = regionCode.substring(0, 2) + val sigunguCode = regionCode.substring(2, 5) + + val population = row.getCell(2).stringCellValue.replace(",", "") + if (Regex("^[0-9]+$").matches(population).not()) { + return@forEach + } + + val regionName = row.getCell(1).stringCellValue + + if (!regionName.trim().contains(" ")) { + return@forEach + } + val parts = regionName.split(" ") + + val sidoName = parts[0].trim() + val sigunguName = parts[1].trim() + val guName = if (parts.size > 2) parts[2].trim() else "" + + val populationInt = population.toInt() + + if (populationInt <= 0) { + return@forEach + } + + if (populationInt > minPopulation) { + regionsMoreThanMinPopulation.add(listOf( + regionCode, + sidoCode, + sigunguCode, + sidoName, + sigunguName, + guName, + population + ) + ) + + } + allRegion.add(listOf(regionCode, sidoCode, sigunguCode, sidoName, sigunguName)) + } + + regionsMoreThanMinPopulation.filter { + val sigunguName = it[4] + val sameSigungu = regionsMoreThanMinPopulation.filter { r -> r[4] == sigunguName } + !((sameSigungu.size > 1) && (it[5].isNullOrBlank())) + }.mapIndexed { idx, values -> + val guName = if (values[5].isNullOrBlank()) "NULL" else "'${values[5]}'" + "${values[0]}, ${values[1]}, ${values[2]}, ${values[3]}, ${values[4]}, $guName, ${values[6]}" + }.joinToString(separator = "\n").also { + File("data/region_population.txt").writeText(it) + } + + allRegion.distinctBy { it[1] to it[2] }.filter { + val sidoName = it[3] + val sigunguName = it[4] + val sameSigungu = allRegion.filter { r -> r[3] == sidoName && r[4] == sigunguName } + !((sameSigungu.size > 1) && sameSigungu.minByOrNull { r -> r[2].toInt() }!![2] != it[2]) + }.joinToString( + prefix = "INSERT INTO region(code, sido_code, sigungu_code, sido_name, sigungu_name) VALUES ", + separator = ",\n" + ) { region -> + "('${region[0]}', '${region[1]}', '${region[2]}', '${region[3]}', '${region[4]}')" + }.also { + File("data/region.sql").writeText("${it};") + } + } +})