Skip to content

Replace Klaxon with kotlinx-serialization #603

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ plugins {
with(libs.plugins) {
alias(kotlin.jvm)
alias(publisher)
alias(serialization)
alias(serialization) apply false
alias(jupyter.api) apply false
alias(dokka)
alias(kover)
Expand Down Expand Up @@ -71,8 +71,6 @@ fun String.findVersion(): Version {

// these names of outdated dependencies will not show up in the table output
val dependencyUpdateExclusions = listOf(
// 5.6 requires Java 11
libs.klaxon.get().name,
// TODO Requires more work to be updated to 1.7.0+, https://github.com/Kotlin/dataframe/issues/594
libs.plugins.kover.get().pluginId,
// TODO Updating requires major changes all across the project, https://github.com/Kotlin/dataframe/issues/364
Expand Down
4 changes: 3 additions & 1 deletion core/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ dependencies {
implementation(libs.kotlin.stdlib.jdk8)

api(libs.commonsCsv)
implementation(libs.klaxon)
implementation(libs.serialization.core)
implementation(libs.serialization.json)

implementation(libs.fuel)

api(libs.kotlin.datetimeJvm)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
package org.jetbrains.kotlinx.dataframe.impl.io

import com.beust.klaxon.JsonArray
import com.beust.klaxon.JsonObject
import kotlinx.serialization.json.JsonArray
import kotlinx.serialization.json.JsonNull
import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.JsonPrimitive
import kotlinx.serialization.json.boolean
import kotlinx.serialization.json.booleanOrNull
import kotlinx.serialization.json.double
import kotlinx.serialization.json.doubleOrNull
import kotlinx.serialization.json.float
import kotlinx.serialization.json.floatOrNull
import kotlinx.serialization.json.int
import kotlinx.serialization.json.intOrNull
import kotlinx.serialization.json.jsonArray
import kotlinx.serialization.json.long
import kotlinx.serialization.json.longOrNull
import org.jetbrains.kotlinx.dataframe.AnyCol
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.DataColumn
Expand Down Expand Up @@ -73,8 +86,8 @@ internal fun readJson(
val df: AnyFrame = when (typeClashTactic) {
ARRAY_AND_VALUE_COLUMNS -> {
when (parsed) {
is JsonArray<*> -> fromJsonListArrayAndValueColumns(
records = parsed.value,
is JsonArray -> fromJsonListArrayAndValueColumns(
records = parsed,
header = header,
keyValuePaths = keyValuePaths,
)
Expand All @@ -88,8 +101,8 @@ internal fun readJson(

ANY_COLUMNS -> {
when (parsed) {
is JsonArray<*> -> fromJsonListAnyColumns(
records = parsed.value,
is JsonArray -> fromJsonListAnyColumns(
records = parsed,
header = header,
keyValuePaths = keyValuePaths,
)
Expand Down Expand Up @@ -126,18 +139,16 @@ internal fun fromJsonListAnyColumns(

// list element type can be JsonObject, JsonArray or primitive
val nameGenerator = ColumnNameGenerator()
records.forEach {
when (it) {
records.forEach { record ->
when (record) {
is JsonObject -> {
hasObject = true
it.entries.forEach {
nameGenerator.addIfAbsent(it.key)
}
record.entries.forEach { nameGenerator.addIfAbsent(it.key) }
}

is JsonArray<*> -> hasArray = true
null -> Unit
else -> hasPrimitive = true
is JsonArray -> hasArray = true
is JsonNull, null -> Unit
is JsonPrimitive -> hasPrimitive = true
}
}

Expand Down Expand Up @@ -177,7 +188,7 @@ internal fun fromJsonListAnyColumns(
)
}

is JsonArray<*> -> {
is JsonArray -> {
val parsed = fromJsonListAnyColumns(
records = v,
keyValuePaths = keyValuePaths,
Expand All @@ -189,9 +200,22 @@ internal fun fromJsonListAnyColumns(
)
}

"NaN" -> {
nanIndices.add(i)
collector.add(null)
is JsonNull -> collector.add(null)

is JsonPrimitive -> {
when {
v.content == "NaN" -> {
nanIndices.add(i)
collector.add(null)
}

v.isString -> collector.add(v.content)
v.booleanOrNull != null -> collector.add(v.boolean)
v.intOrNull != null -> collector.add(v.int)
v.longOrNull != null -> collector.add(v.long)
v.doubleOrNull != null -> collector.add(v.double)
v.floatOrNull != null -> collector.add(v.float)
}
}

else -> collector.add(v)
Expand Down Expand Up @@ -227,8 +251,8 @@ internal fun fromJsonListAnyColumns(
records.forEach {
startIndices.add(values.size)
when (it) {
is JsonArray<*> -> values.addAll(it.value)
null -> Unit
is JsonArray -> values.addAll(it)
is JsonNull, null -> Unit
else -> error("Expected JsonArray, got $it")
}
}
Expand All @@ -242,10 +266,10 @@ internal fun fromJsonListAnyColumns(
parsed.isSingleUnnamedColumn() -> {
val col = (parsed.getColumn(0) as UnnamedColumn).col
val elementType = col.type
val values = col.values.asList().splitByIndices(startIndices.asSequence()).toList()
val columnValues = col.values.asList().splitByIndices(startIndices.asSequence()).toList()
DataColumn.createValueColumn(
name = arrayColumnName,
values = values,
values = columnValues,
type = List::class.createType(listOf(KTypeProjection.invariant(elementType))),
)
}
Expand All @@ -263,10 +287,10 @@ internal fun fromJsonListAnyColumns(
colType == AnyColType.OBJECTS && isKeyValue -> {
// collect the value types to make sure Value columns with lists and other values aren't all turned into lists
val valueTypes = mutableSetOf<KType>()
val dataFrames = records.map {
when (it) {
val dataFrames = records.map { record ->
when (record) {
is JsonObject -> {
val map = it.map.mapValues { (key, value) ->
val map = record.mapValues { (key, value) ->
val parsed = fromJsonListAnyColumns(
records = listOf(value),
keyValuePaths = keyValuePaths,
Expand All @@ -288,8 +312,8 @@ internal fun fromJsonListAnyColumns(
)
}

null -> DataFrame.emptyOf<AnyKeyValueProperty>()
else -> error("Expected JsonObject, got $it")
is JsonNull, null -> DataFrame.emptyOf<AnyKeyValueProperty>()
else -> error("Expected JsonObject, got $record")
}
}

Expand Down Expand Up @@ -328,7 +352,7 @@ internal fun fromJsonListAnyColumns(
records.forEach {
when (it) {
is JsonObject -> values.add(it[colName])
null -> values.add(null)
is JsonNull, null -> values.add(null)
else -> error("Expected JsonObject, got $it")
}
}
Expand Down Expand Up @@ -401,18 +425,18 @@ internal fun fromJsonListArrayAndValueColumns(
// { "array": [], "value": 123, "a": null, "b": null }

val nameGenerator = ColumnNameGenerator()
records.forEach {
when (it) {
is JsonObject -> it.entries.forEach {
records.forEach { record ->
when (record) {
is JsonObject -> record.entries.forEach {
nameGenerator.addIfAbsent(it.key)
}

is JsonArray<*> -> hasArray = true
null -> Unit
else -> hasPrimitive = true
is JsonArray -> hasArray = true
is JsonNull, null -> Unit
is JsonPrimitive -> hasPrimitive = true
}
}
if (records.all { it == null }) hasPrimitive = true
if (records.all { it == null || it is JsonNull }) hasPrimitive = true

// Add a value column to the collected names if needed
val valueColumn = if (hasPrimitive || records.isEmpty()) {
Expand All @@ -433,10 +457,10 @@ internal fun fromJsonListArrayAndValueColumns(
val columns: List<AnyCol> = when {
// instead of using the names, generate a single key/value frame column
isKeyValue -> {
val dataFrames = records.map {
when (it) {
val dataFrames = records.map { record ->
when (record) {
is JsonObject -> {
val map = it.map.mapValues { (key, value) ->
val map = record.mapValues { (key, value) ->
val parsed = fromJsonListArrayAndValueColumns(
records = listOf(value),
keyValuePaths = keyValuePaths,
Expand All @@ -459,8 +483,8 @@ internal fun fromJsonListArrayAndValueColumns(
)
}

null -> DataFrame.emptyOf<AnyKeyValueProperty>()
else -> error("Expected JsonObject, got $it")
is JsonNull, null -> DataFrame.emptyOf<AnyKeyValueProperty>()
else -> error("Expected JsonObject, got $record")
}
}

Expand Down Expand Up @@ -488,10 +512,22 @@ internal fun fromJsonListArrayAndValueColumns(
records.forEachIndexed { i, v ->
when (v) {
is JsonObject -> collector.add(null)
is JsonArray<*> -> collector.add(null)
"NaN" -> {
nanIndices.add(i)
collector.add(null)
is JsonArray -> collector.add(null)
is JsonNull -> collector.add(null)
is JsonPrimitive -> {
when {
v.content == "NaN" -> {
nanIndices.add(i)
collector.add(null)
}

v.isString -> collector.add(v.content)
v.booleanOrNull != null -> collector.add(v.boolean)
v.intOrNull != null -> collector.add(v.int)
v.longOrNull != null -> collector.add(v.long)
v.doubleOrNull != null -> collector.add(v.double)
v.floatOrNull != null -> collector.add(v.float)
}
}

else -> collector.add(v)
Expand Down Expand Up @@ -526,7 +562,7 @@ internal fun fromJsonListArrayAndValueColumns(
val startIndices = ArrayList<Int>()
records.forEach {
startIndices.add(values.size)
if (it is JsonArray<*>) values.addAll(it.value)
if (it is JsonArray) values.addAll(it.jsonArray)
}
val parsed = fromJsonListArrayAndValueColumns(
records = values,
Expand All @@ -538,10 +574,11 @@ internal fun fromJsonListArrayAndValueColumns(
parsed.isSingleUnnamedColumn() -> {
val col = (parsed.getColumn(0) as UnnamedColumn).col
val elementType = col.type
val values = col.values.asList().splitByIndices(startIndices.asSequence()).toList()
val columnValues =
col.values.asList().splitByIndices(startIndices.asSequence()).toList()
DataColumn.createValueColumn(
name = colName,
values = values,
values = columnValues,
type = List::class.createType(listOf(KTypeProjection.invariant(elementType))),
)
}
Expand Down
Loading