From 63d6f8b13d38cfde8e76c5bbd4c5930c657f5e1c Mon Sep 17 00:00:00 2001 From: devcrocod Date: Fri, 23 Feb 2024 19:51:34 +0100 Subject: [PATCH 01/10] Replace Klaxon with kotlinx-serialization in JSON operations --- build.gradle.kts | 4 +- core/build.gradle.kts | 4 +- .../jetbrains/kotlinx/dataframe/io/json.kt | 227 ++++++++++-------- .../dataframe/jupyter/JupyterHtmlRenderer.kt | 18 +- .../jetbrains/kotlinx/dataframe/io/json.kt | 3 +- .../dataframe/jupyter/RenderingTests.kt | 43 ++-- gradle/libs.versions.toml | 7 +- .../dataframe-gradle-plugin/build.gradle.kts | 3 +- .../dataframe/gradle/DataFrameReadTest.kt | 6 +- .../kotlinx/dataframe/samples/api/Write.kt | 20 +- 10 files changed, 178 insertions(+), 157 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 956189647c..21af11575a 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -14,7 +14,7 @@ plugins { with(libs.plugins) { alias(kotlin.jvm) alias(publisher) - alias(serialization) + alias(serialization) apply false alias(jupyter.api) apply false alias(dokka) alias(kover) @@ -71,7 +71,7 @@ private fun String.findVersion(): Version { // these names of outdated dependencies will not show up in the table output val dependencyUpdateExclusions = listOf( // 5.6 requires Java 11 - libs.klaxon.get().name, +// libs.serialization.get().name, // TODO Requires more work to be updated to 1.7.0+, https://github.com/Kotlin/dataframe/issues/594 libs.plugins.kover.get().pluginId, // TODO Updating requires major changes all across the project, https://github.com/Kotlin/dataframe/issues/364 diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 196eaba1cb..fb045c3d26 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -66,7 +66,9 @@ dependencies { implementation(libs.kotlin.stdlib.jdk8) api(libs.commonsCsv) - implementation(libs.klaxon) + implementation(libs.serialization.core) + implementation(libs.serialization.json) + implementation(libs.fuel) api(libs.kotlin.datetimeJvm) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index b464ccdb5e..6c6e10095b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -1,60 +1,26 @@ +@file:OptIn(ExperimentalSerializationApi::class) + package org.jetbrains.kotlinx.dataframe.io -import com.beust.klaxon.JsonArray -import com.beust.klaxon.JsonObject -import com.beust.klaxon.KlaxonJson -import com.beust.klaxon.Parser -import com.beust.klaxon.json -import org.jetbrains.kotlinx.dataframe.AnyCol -import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.ColumnsContainer -import org.jetbrains.kotlinx.dataframe.DataColumn -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.api.JsonPath -import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.columnOf -import org.jetbrains.kotlinx.dataframe.api.concat -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.firstOrNull -import org.jetbrains.kotlinx.dataframe.api.getColumn -import org.jetbrains.kotlinx.dataframe.api.indices -import org.jetbrains.kotlinx.dataframe.api.isList -import org.jetbrains.kotlinx.dataframe.api.mapIndexed +import kotlinx.serialization.ExperimentalSerializationApi +import kotlinx.serialization.json.* +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.api.name -import org.jetbrains.kotlinx.dataframe.api.named -import org.jetbrains.kotlinx.dataframe.api.rows -import org.jetbrains.kotlinx.dataframe.api.schema -import org.jetbrains.kotlinx.dataframe.api.single -import org.jetbrains.kotlinx.dataframe.api.splitInto -import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadJsonMethod import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator -import org.jetbrains.kotlinx.dataframe.impl.DataCollectorBase -import org.jetbrains.kotlinx.dataframe.impl.asList +import org.jetbrains.kotlinx.dataframe.impl.* import org.jetbrains.kotlinx.dataframe.impl.columns.createColumn -import org.jetbrains.kotlinx.dataframe.impl.commonType -import org.jetbrains.kotlinx.dataframe.impl.createDataCollector -import org.jetbrains.kotlinx.dataframe.impl.guessValueType import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema import org.jetbrains.kotlinx.dataframe.impl.schema.intersectSchemas -import org.jetbrains.kotlinx.dataframe.impl.splitByIndices import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS -import org.jetbrains.kotlinx.dataframe.ncol -import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema -import org.jetbrains.kotlinx.dataframe.type -import org.jetbrains.kotlinx.dataframe.typeClass -import org.jetbrains.kotlinx.dataframe.values import java.io.File import java.io.InputStream import java.net.URL @@ -243,12 +209,13 @@ public fun DataRow.Companion.readJson( * @param header Optional list of column names. If given, [stream] will be read like an object with [header] being the keys. * @return [DataFrame] from the given [stream]. */ +@OptIn(ExperimentalSerializationApi::class) public fun DataFrame.Companion.readJson( stream: InputStream, header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = readJson(Parser.default().parse(stream), header, keyValuePaths, typeClashTactic) +): AnyFrame = readJson(Json.decodeFromStream(stream), header, keyValuePaths, typeClashTactic) /** * @param stream Json as [InputStream] to be converted to a [DataRow]. @@ -278,7 +245,7 @@ public fun DataFrame.Companion.readJsonStr( header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = readJson(Parser.default().parse(StringBuilder(text)), header, keyValuePaths, typeClashTactic) +): AnyFrame = readJson(Json.parseToJsonElement(text), header, keyValuePaths, typeClashTactic) /** * @param text Json as [String] to be converted to a [DataRow]. @@ -304,8 +271,8 @@ private fun readJson( val df: AnyFrame = when (typeClashTactic) { ARRAY_AND_VALUE_COLUMNS -> { when (parsed) { - is JsonArray<*> -> fromJsonListArrayAndValueColumns( - records = parsed.value, + is JsonArray -> fromJsonListArrayAndValueColumns( + records = parsed, header = header, keyValuePaths = keyValuePaths, ) @@ -319,8 +286,8 @@ private fun readJson( ANY_COLUMNS -> { when (parsed) { - is JsonArray<*> -> fromJsonListAnyColumns( - records = parsed.value, + is JsonArray -> fromJsonListAnyColumns( + records = parsed, header = header, keyValuePaths = keyValuePaths, ) @@ -381,9 +348,9 @@ internal fun fromJsonListAnyColumns( } } - is JsonArray<*> -> hasArray = true - null -> Unit - else -> hasPrimitive = true + is JsonArray -> hasArray = true + is JsonNull, null -> Unit + is JsonPrimitive -> hasPrimitive = true } } @@ -423,7 +390,7 @@ internal fun fromJsonListAnyColumns( ) } - is JsonArray<*> -> { + is JsonArray -> { val parsed = fromJsonListAnyColumns( records = v, keyValuePaths = keyValuePaths, @@ -435,9 +402,21 @@ internal fun fromJsonListAnyColumns( ) } - "NaN" -> { - nanIndices.add(i) - collector.add(null) + is JsonPrimitive -> { + when { + v.content == "NaN" -> { + nanIndices.add(i) + collector.add(null) + } + + v.isString -> collector.add(v.content) + v.booleanOrNull != null -> collector.add(v.boolean) + v.intOrNull != null -> collector.add(v.int) + v.longOrNull != null -> collector.add(v.long) + v.doubleOrNull != null -> collector.add(v.double) + // v.floatOrNull != null -> collector.add(v.float) + v.jsonPrimitive is JsonNull -> collector.add(null) + } } else -> collector.add(v) @@ -473,8 +452,8 @@ internal fun fromJsonListAnyColumns( records.forEach { startIndices.add(values.size) when (it) { - is JsonArray<*> -> values.addAll(it.value) - null -> Unit + is JsonArray -> values.addAll(it) + is JsonNull, null -> Unit else -> error("Expected JsonArray, got $it") } } @@ -512,7 +491,7 @@ internal fun fromJsonListAnyColumns( val dataFrames = records.map { when (it) { is JsonObject -> { - val map = it.map.mapValues { (key, value) -> + val map = it.mapValues { (key, value) -> val parsed = fromJsonListAnyColumns( records = listOf(value), keyValuePaths = keyValuePaths, @@ -534,7 +513,7 @@ internal fun fromJsonListAnyColumns( ) } - null -> DataFrame.emptyOf() + is JsonNull, null -> DataFrame.emptyOf() else -> error("Expected JsonObject, got $it") } } @@ -574,7 +553,7 @@ internal fun fromJsonListAnyColumns( records.forEach { when (it) { is JsonObject -> values.add(it[colName]) - null -> values.add(null) + is JsonNull, null -> values.add(null) else -> error("Expected JsonObject, got $it") } } @@ -656,12 +635,12 @@ internal fun fromJsonListArrayAndValueColumns( nameGenerator.addIfAbsent(it.key) } - is JsonArray<*> -> hasArray = true - null -> Unit - else -> hasPrimitive = true + is JsonArray -> hasArray = true + is JsonNull, null -> Unit + is JsonPrimitive -> hasPrimitive = true } } - if (records.all { it == null }) hasPrimitive = true + if (records.all { it == null || it is JsonNull }) hasPrimitive = true // Add a value column to the collected names if needed val valueColumn = if (hasPrimitive || records.isEmpty()) { @@ -685,7 +664,7 @@ internal fun fromJsonListArrayAndValueColumns( val dataFrames = records.map { when (it) { is JsonObject -> { - val map = it.map.mapValues { (key, value) -> + val map = it.mapValues { (key, value) -> val parsed = fromJsonListArrayAndValueColumns( records = listOf(value), keyValuePaths = keyValuePaths, @@ -708,7 +687,7 @@ internal fun fromJsonListArrayAndValueColumns( ) } - null -> DataFrame.emptyOf() + is JsonNull, null -> DataFrame.emptyOf() else -> error("Expected JsonObject, got $it") } } @@ -737,10 +716,23 @@ internal fun fromJsonListArrayAndValueColumns( records.forEachIndexed { i, v -> when (v) { is JsonObject -> collector.add(null) - is JsonArray<*> -> collector.add(null) - "NaN" -> { - nanIndices.add(i) - collector.add(null) + is JsonArray -> collector.add(null) + is JsonPrimitive -> { + when { + v.content == "NaN" -> { + nanIndices.add(i) + collector.add(null) + } + + v.isString -> collector.add(v.content) + v.booleanOrNull != null -> collector.add(v.boolean) + v.intOrNull != null -> collector.add(v.int) + v.longOrNull != null -> collector.add(v.long) + // v.floatOrNull != null -> collector.add(v.float) + v.doubleOrNull != null -> collector.add(v.double) + v is JsonNull -> collector.add(null) + else -> collector.add(v) + } } else -> collector.add(v) @@ -775,7 +767,7 @@ internal fun fromJsonListArrayAndValueColumns( val startIndices = ArrayList() records.forEach { startIndices.add(values.size) - if (it is JsonArray<*>) values.addAll(it.value) + if (it is JsonArray) values.addAll(it.jsonArray) } val parsed = fromJsonListArrayAndValueColumns( records = values, @@ -857,30 +849,46 @@ private class UnnamedColumn(val col: DataColumn) : DataColumn by col private val valueTypes = setOf(Boolean::class, Double::class, Int::class, Float::class, Long::class, Short::class, Byte::class) -internal fun KlaxonJson.encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject? { - val values = frame.columns().map { col -> - when { +@OptIn(ExperimentalSerializationApi::class) +private fun convert(value: Any?): JsonElement = when (value) { + is JsonElement -> value + is Double -> JsonPrimitive(value) + is Float -> JsonPrimitive(value.toDouble()) // It is necessary + // because kotlinx-serialization accurately handles Float -> Float, + // unlike klaxon. + is Number -> JsonPrimitive(value) + is String -> JsonPrimitive(value) + is Char -> JsonPrimitive(value.toString()) + is Boolean -> JsonPrimitive(value) + null -> JsonPrimitive(null) + else -> JsonPrimitive(value.toString()) +} + +internal fun encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject { + val values: Map = frame.columns().associate { col -> + col.name to when { col is ColumnGroup<*> -> encodeRow(col, index) col is FrameColumn<*> -> encodeFrame(col[index]) col.isList() -> { - col[index]?.let { array(it as List<*>) } ?: array() + col[index]?.let { + JsonArray((it as List<*>).map { value -> convert(value) }) + } ?: JsonPrimitive(null) } col.typeClass in valueTypes -> { val v = col[index] - if ((v is Double && v.isNaN()) || (v is Float && v.isNaN())) { - v.toString() - } else v + convert(v) } - else -> col[index]?.toString() - }.let { col.name to it } + else -> JsonPrimitive(col[index]?.toString()) + } } - if (values.isEmpty()) return null - return obj(values) + + if (values.isEmpty()) return buildJsonObject { } + return JsonObject(values) } -internal fun KlaxonJson.encodeFrame(frame: AnyFrame): JsonArray<*> { +internal fun encodeFrame(frame: AnyFrame): JsonArray { val allColumns = frame.columns() // if there is only 1 column, then `isValidValueColumn` always true. @@ -907,7 +915,7 @@ internal fun KlaxonJson.encodeFrame(frame: AnyFrame): JsonArray<*> { } } - val arrayColumn = frame.columns().filter { it.name.startsWith(arrayColumnName) } + val arrayColumn = allColumns.filter { it.name.startsWith(arrayColumnName) } .takeIf { isPossibleToFindUnnamedColumns } ?.maxByOrNull { it.name }?.let { arrayCol -> if (arrayCol.kind() == ColumnKind.Group) null @@ -930,42 +938,51 @@ internal fun KlaxonJson.encodeFrame(frame: AnyFrame): JsonArray<*> { val data = frame.indices().map { rowIndex -> valueColumn?.get(rowIndex) ?: arrayColumn?.get(rowIndex) - ?.let { if (arraysAreFrames) encodeFrame(it as AnyFrame) else null } ?: encodeRow(frame, rowIndex) + ?.let { if (arraysAreFrames) encodeFrame(it as AnyFrame) else null } ?: encodeRow( + frame, + rowIndex + ) } - return array(data) + return buildJsonArray { addAll(data.map { convert(it) }) } } -public fun AnyFrame.toJson(prettyPrint: Boolean = false, canonical: Boolean = false): String { - return json { - encodeFrame(this@toJson) - }.toJsonString(prettyPrint, canonical) +public fun AnyFrame.toJson(prettyPrint: Boolean = false): String { + val json = Json { + this.prettyPrint = prettyPrint + isLenient = true + allowSpecialFloatingPointValues = true + } + return json.encodeToString(JsonElement.serializer(), encodeFrame(this@toJson)) } -public fun AnyRow.toJson(prettyPrint: Boolean = false, canonical: Boolean = false): String { - return json { - encodeRow(df(), index()) - }?.toJsonString(prettyPrint, canonical) ?: "" +public fun AnyRow.toJson(prettyPrint: Boolean = false): String { + val json = Json { + this.prettyPrint = prettyPrint + isLenient = true + allowSpecialFloatingPointValues = true + } + return json.encodeToString(JsonElement.serializer(), encodeRow(df(), index())) } -public fun AnyFrame.writeJson(file: File, prettyPrint: Boolean = false, canonical: Boolean = false) { - file.writeText(toJson(prettyPrint, canonical)) +public fun AnyFrame.writeJson(file: File, prettyPrint: Boolean = false) { + file.writeText(toJson(prettyPrint)) } -public fun AnyFrame.writeJson(path: String, prettyPrint: Boolean = false, canonical: Boolean = false): Unit = - writeJson(File(path), prettyPrint, canonical) +public fun AnyFrame.writeJson(path: String, prettyPrint: Boolean = false): Unit = + writeJson(File(path), prettyPrint) -public fun AnyFrame.writeJson(writer: Appendable, prettyPrint: Boolean = false, canonical: Boolean = false) { - writer.append(toJson(prettyPrint, canonical)) +public fun AnyFrame.writeJson(writer: Appendable, prettyPrint: Boolean = false) { + writer.append(toJson(prettyPrint)) } -public fun AnyRow.writeJson(file: File, prettyPrint: Boolean = false, canonical: Boolean = false) { - file.writeText(toJson(prettyPrint, canonical)) +public fun AnyRow.writeJson(file: File, prettyPrint: Boolean = false) { + file.writeText(toJson(prettyPrint)) } -public fun AnyRow.writeJson(path: String, prettyPrint: Boolean = false, canonical: Boolean = false) { - writeJson(File(path), prettyPrint, canonical) +public fun AnyRow.writeJson(path: String, prettyPrint: Boolean = false) { + writeJson(File(path), prettyPrint) } -public fun AnyRow.writeJson(writer: Appendable, prettyPrint: Boolean = false, canonical: Boolean = false) { - writer.append(toJson(prettyPrint, canonical)) +public fun AnyRow.writeJson(writer: Appendable, prettyPrint: Boolean = false) { + writer.append(toJson(prettyPrint)) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt index 1486448cd1..2f8c491497 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt @@ -1,6 +1,7 @@ package org.jetbrains.kotlinx.dataframe.jupyter -import com.beust.klaxon.json +import kotlinx.serialization.ExperimentalSerializationApi +import kotlinx.serialization.json.* import org.jetbrains.kotlinx.dataframe.api.rows import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData @@ -28,6 +29,7 @@ internal class JupyterHtmlRenderer( val builder: JupyterIntegration.Builder, ) +@OptIn(ExperimentalSerializationApi::class) internal inline fun JupyterHtmlRenderer.render( noinline getFooter: (T) -> String, crossinline modifyConfig: T.(DisplayConfiguration) -> DisplayConfiguration = { it }, @@ -60,14 +62,12 @@ internal inline fun JupyterHtmlRenderer.render( val staticHtml = df.toStaticHtml(reifiedDisplayConfiguration, DefaultCellRenderer).toJupyterHtmlData() if (notebook.kernelVersion >= KotlinKernelVersion.from(MIN_KERNEL_VERSION_FOR_NEW_TABLES_UI)!!) { - val jsonEncodedDf = json { - obj( - "nrow" to df.size.nrow, - "ncol" to df.size.ncol, - "columns" to df.columnNames(), - "kotlin_dataframe" to encodeFrame(df.rows().take(limit).toDataFrame()), - ) - }.toJsonString() + val jsonEncodedDf = buildJsonObject { + put("nrow", df.size.nrow) + put("ncol", df.size.ncol) + putJsonArray("columns") { addAll(df.columnNames()) } + put("kotlin_dataframe", encodeFrame(df.rows().take(limit).toDataFrame()),) + }.toString() notebook.renderAsIFrameAsNeeded(html, staticHtml, jsonEncodedDf) } else { notebook.renderHtmlAsIFrameIfNeeded(html) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index f02d0060f3..cab91e668c 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -949,6 +949,7 @@ class JsonTests { @Test fun `nulls in columns should be encoded explicitly`() { val df = dataFrameOf("a", "b")("1", null, "2", 12) - df.toJson(canonical = true) shouldContain "\"b\":null" + df.toJson() shouldContain "\"b\":null" +// df.toJson(canonical = true) shouldContain "\"b\":null" } } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt index 8f48073e73..98e1c9b207 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt @@ -1,14 +1,12 @@ package org.jetbrains.kotlinx.dataframe.jupyter -import com.beust.klaxon.JsonArray -import com.beust.klaxon.JsonObject -import com.beust.klaxon.Parser import io.kotest.assertions.throwables.shouldNotThrow import io.kotest.matchers.comparables.shouldBeGreaterThan import io.kotest.matchers.comparables.shouldBeLessThan import io.kotest.matchers.shouldBe import io.kotest.matchers.string.shouldContain import io.kotest.matchers.string.shouldNotContain +import kotlinx.serialization.json.* import org.intellij.lang.annotations.Language import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult import org.jetbrains.kotlinx.jupyter.testkit.JupyterReplTestCase @@ -94,9 +92,9 @@ class RenderingTests : JupyterReplTestCase() { assertDataFrameDimensions(json, 30, 1) - val rows = json.array>("kotlin_dataframe")!! - rows.getObj(0).int("id") shouldBe 21 - rows.getObj(rows.lastIndex).int("id") shouldBe 50 + val rows = json["kotlin_dataframe"]!!.jsonArray + rows.getObj(0)["id"]?.jsonPrimitive?.int shouldBe 21 + rows.getObj(rows.lastIndex)["id"]?.jsonPrimitive?.int shouldBe 50 } /** @@ -111,16 +109,15 @@ class RenderingTests : JupyterReplTestCase() { } private fun assertDataFrameDimensions(json: JsonObject, expectedRows: Int, expectedColumns: Int) { - json.int("nrow") shouldBe expectedRows - json.int("ncol") shouldBe expectedColumns + json["nrow"]?.jsonPrimitive?.int shouldBe expectedRows + json["ncol"]?.jsonPrimitive?.int shouldBe expectedColumns } private fun parseDataframeJson(result: MimeTypedResult): JsonObject { - val parser = Parser.default() - return parser.parse(StringBuilder(result["application/kotlindataframe+json"]!!)) as JsonObject + return Json.decodeFromString(result["application/kotlindataframe+json"]!!) } - private fun JsonArray<*>.getObj(index: Int) = this.get(index) as JsonObject + private fun JsonArray.getObj(index: Int) = this[index] as JsonObject @Test fun `test kotlin notebook plugin utils sort by one column asc`() { @@ -138,10 +135,10 @@ class RenderingTests : JupyterReplTestCase() { @Suppress("UNCHECKED_CAST") private fun assertSortedById(json: JsonObject, desc: Boolean) { - val rows = json["kotlin_dataframe"] as JsonArray + val rows = json["kotlin_dataframe"]!!.jsonArray as List var previousId = if (desc) 101 else 0 - rows.forEach { row -> - val currentId = row.int("id")!! + rows.forEach { row: JsonObject -> + val currentId = row["id"]!!.jsonPrimitive.int if (desc) currentId shouldBeLessThan previousId else currentId shouldBeGreaterThan previousId previousId = currentId } @@ -177,25 +174,25 @@ class RenderingTests : JupyterReplTestCase() { assertDataFrameDimensions(json, 100, 2) - val rows = json["kotlin_dataframe"] as JsonArray + val rows = json["kotlin_dataframe"]!!.jsonArray as List assertSortedByCategory(rows) assertSortedById(rows) } - private fun assertSortedByCategory(rows: JsonArray) { + private fun assertSortedByCategory(rows: List) { rows.forEachIndexed { i, row -> - val currentCategory = row.string("category") + val currentCategory = row["category"]!!.jsonPrimitive.content if (i < 50) currentCategory shouldBe "odd" else currentCategory shouldBe "even" } } - private fun assertSortedById(rows: JsonArray) { + private fun assertSortedById(rows: List) { var previousCategory = "odd" var previousId = 0 for (row in rows) { - val currentCategory = row.string("category")!! - val currentId = row.int("id")!! + val currentCategory = row["category"]!!.jsonPrimitive.content + val currentId = row["id"]!!.jsonPrimitive.int if (previousCategory == "odd" && currentCategory == "even") { previousId shouldBeGreaterThan currentId @@ -220,9 +217,9 @@ class RenderingTests : JupyterReplTestCase() { assertDataFrameDimensions(json, 2, 2) - val rows = json.array>("kotlin_dataframe")!! - rows.getObj(0).array("group1")!!.size shouldBe 50 - rows.getObj(1).array("group1")!!.size shouldBe 50 + val rows = json["kotlin_dataframe"]!!.jsonArray + rows.getObj(0).get("group1")!!.jsonArray.size shouldBe 50 + rows.getObj(1).get("group1")!!.jsonArray.size shouldBe 50 } // Regression KTNB-424 diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 897492bf7f..c2e4cd6805 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -26,7 +26,7 @@ kover = "0.6.1" commonsCsv = "1.10.0" commonsCompress = "1.26.0" -klaxon = "5.5" # 5.6 requires Java 11 +serialization = "1.6.2" fuel = "2.3.1" poi = "5.2.5" mariadb = "3.3.2" @@ -69,7 +69,10 @@ kotlin-reflect = { group = "org.jetbrains.kotlin", name = "kotlin-reflect", vers kotlin-scriptingJvm = { group = "org.jetbrains.kotlin", name = "kotlin-scripting-jvm", version.ref = "kotlin" } commonsCsv = { group = "org.apache.commons", name = "commons-csv", version.ref = "commonsCsv" } commonsCompress = { group = "org.apache.commons", name = "commons-compress", version.ref = "commonsCompress" } -klaxon = { group = "com.beust", name = "klaxon", version.ref = "klaxon" } +# Serialization +serialization-core = { group = "org.jetbrains.kotlinx", name = "kotlinx-serialization-core", version.ref = "serialization" } +serialization-json = { group = "org.jetbrains.kotlinx", name = "kotlinx-serialization-json", version.ref = "serialization" } + fuel = { group = "com.github.kittinunf.fuel", name = "fuel", version.ref = "fuel" } poi = { group = "org.apache.poi", name = "poi", version.ref = "poi" } mariadb = { group = "org.mariadb.jdbc", name = "mariadb-java-client", version.ref = "mariadb" } diff --git a/plugins/dataframe-gradle-plugin/build.gradle.kts b/plugins/dataframe-gradle-plugin/build.gradle.kts index 846a15ccb5..a134075364 100644 --- a/plugins/dataframe-gradle-plugin/build.gradle.kts +++ b/plugins/dataframe-gradle-plugin/build.gradle.kts @@ -24,7 +24,8 @@ dependencies { implementation(libs.kotlin.gradle.plugin.api) implementation(libs.kotlin.gradle.plugin) - implementation(libs.klaxon) + implementation(libs.serialization.core) + implementation(libs.serialization.json) implementation(libs.ksp.gradle) implementation(libs.ksp.api) diff --git a/plugins/dataframe-gradle-plugin/src/test/kotlin/org/jetbrains/dataframe/gradle/DataFrameReadTest.kt b/plugins/dataframe-gradle-plugin/src/test/kotlin/org/jetbrains/dataframe/gradle/DataFrameReadTest.kt index 2cf9c32cac..e709dc2b05 100644 --- a/plugins/dataframe-gradle-plugin/src/test/kotlin/org/jetbrains/dataframe/gradle/DataFrameReadTest.kt +++ b/plugins/dataframe-gradle-plugin/src/test/kotlin/org/jetbrains/dataframe/gradle/DataFrameReadTest.kt @@ -1,11 +1,11 @@ package org.jetbrains.dataframe.gradle -import com.beust.klaxon.KlaxonException import io.kotest.assertions.asClue import io.kotest.assertions.throwables.shouldNotThrowAny import io.kotest.assertions.throwables.shouldThrow import io.kotest.assertions.throwables.shouldThrowAny import io.kotest.matchers.shouldBe +import kotlinx.serialization.SerializationException import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.io.read import org.jetbrains.kotlinx.dataframe.io.readSqlTable @@ -33,7 +33,7 @@ class DataFrameReadTest { fun `file with invalid json`() { val temp = Files.createTempDirectory("").toFile() val invalidJson = File(temp, "test.json").also { it.writeText(".") } - shouldThrow { + shouldThrow { DataFrame.read(invalidJson) } } @@ -74,7 +74,7 @@ class DataFrameReadTest { @Test fun `URL with invalid JSON`() { useHostedJson("") { url -> - shouldThrow { + shouldThrow { DataFrame.read(url).also { println(it) } } } diff --git a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Write.kt b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Write.kt index 0157f4d182..28274a96d3 100644 --- a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Write.kt +++ b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Write.kt @@ -60,16 +60,16 @@ class Write : TestBase() { val jsonStr = df.toJson(prettyPrint = true) // SampleEnd jsonStr shouldStartWith """ - [{ - "name": { - "firstName": "Alice", - "lastName": "Cooper" - }, - "age": 15, - "city": "London", - "weight": 54, - "isHappy": true - } + [ + { + "name": { + "firstName": "Alice", + "lastName": "Cooper" + }, + "age": 15, + "city": "London", + "weight": 54, + "isHappy": true """.rejoinWithSystemLineSeparator() } From 83ba57838c8462278b24c4207c6a4ae7de720712 Mon Sep 17 00:00:00 2001 From: devcrocod Date: Mon, 4 Mar 2024 13:12:26 +0100 Subject: [PATCH 02/10] Remove star imports and fix testing by reading json file --- build.gradle.kts | 2 - .../jetbrains/kotlinx/dataframe/io/json.kt | 59 +++++++++++++++++-- .../dataframe/jupyter/JupyterHtmlRenderer.kt | 5 +- .../dataframe/jupyter/RenderingTests.kt | 7 ++- .../dataframe/gradle/DataFrameReadTest.kt | 2 +- 5 files changed, 66 insertions(+), 9 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 21af11575a..fffbdbb162 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -70,8 +70,6 @@ private fun String.findVersion(): Version { // these names of outdated dependencies will not show up in the table output val dependencyUpdateExclusions = listOf( - // 5.6 requires Java 11 -// libs.serialization.get().name, // TODO Requires more work to be updated to 1.7.0+, https://github.com/Kotlin/dataframe/issues/594 libs.plugins.kover.get().pluginId, // TODO Updating requires major changes all across the project, https://github.com/Kotlin/dataframe/issues/364 diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 6c6e10095b..bcc6b66d24 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -3,24 +3,75 @@ package org.jetbrains.kotlinx.dataframe.io import kotlinx.serialization.ExperimentalSerializationApi -import kotlinx.serialization.json.* -import org.jetbrains.kotlinx.dataframe.* -import org.jetbrains.kotlinx.dataframe.api.* +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonArray +import kotlinx.serialization.json.JsonElement +import kotlinx.serialization.json.JsonNull +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.JsonPrimitive +import kotlinx.serialization.json.boolean +import kotlinx.serialization.json.booleanOrNull +import kotlinx.serialization.json.buildJsonArray +import kotlinx.serialization.json.buildJsonObject +import kotlinx.serialization.json.decodeFromStream +import kotlinx.serialization.json.double +import kotlinx.serialization.json.doubleOrNull +import kotlinx.serialization.json.int +import kotlinx.serialization.json.intOrNull +import kotlinx.serialization.json.jsonArray +import kotlinx.serialization.json.jsonPrimitive +import kotlinx.serialization.json.long +import kotlinx.serialization.json.longOrNull +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnsContainer +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.api.JsonPath +import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.concat +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.firstOrNull +import org.jetbrains.kotlinx.dataframe.api.getColumn +import org.jetbrains.kotlinx.dataframe.api.indices +import org.jetbrains.kotlinx.dataframe.api.isList +import org.jetbrains.kotlinx.dataframe.api.mapIndexed import org.jetbrains.kotlinx.dataframe.api.name +import org.jetbrains.kotlinx.dataframe.api.named +import org.jetbrains.kotlinx.dataframe.api.rows +import org.jetbrains.kotlinx.dataframe.api.schema +import org.jetbrains.kotlinx.dataframe.api.single +import org.jetbrains.kotlinx.dataframe.api.splitInto +import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadJsonMethod import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.impl.* +import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator +import org.jetbrains.kotlinx.dataframe.impl.DataCollectorBase +import org.jetbrains.kotlinx.dataframe.impl.asList import org.jetbrains.kotlinx.dataframe.impl.columns.createColumn +import org.jetbrains.kotlinx.dataframe.impl.commonType +import org.jetbrains.kotlinx.dataframe.impl.createDataCollector +import org.jetbrains.kotlinx.dataframe.impl.guessValueType import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema import org.jetbrains.kotlinx.dataframe.impl.schema.intersectSchemas +import org.jetbrains.kotlinx.dataframe.impl.splitByIndices import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS +import org.jetbrains.kotlinx.dataframe.ncol +import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema +import org.jetbrains.kotlinx.dataframe.type +import org.jetbrains.kotlinx.dataframe.typeClass +import org.jetbrains.kotlinx.dataframe.values import java.io.File import java.io.InputStream import java.net.URL diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt index 2f8c491497..c52af13a57 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt @@ -1,7 +1,10 @@ package org.jetbrains.kotlinx.dataframe.jupyter import kotlinx.serialization.ExperimentalSerializationApi -import kotlinx.serialization.json.* +import kotlinx.serialization.json.addAll +import kotlinx.serialization.json.buildJsonObject +import kotlinx.serialization.json.put +import kotlinx.serialization.json.putJsonArray import org.jetbrains.kotlinx.dataframe.api.rows import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt index 98e1c9b207..f0404d3a39 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt @@ -6,7 +6,12 @@ import io.kotest.matchers.comparables.shouldBeLessThan import io.kotest.matchers.shouldBe import io.kotest.matchers.string.shouldContain import io.kotest.matchers.string.shouldNotContain -import kotlinx.serialization.json.* +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonArray +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.int +import kotlinx.serialization.json.jsonArray +import kotlinx.serialization.json.jsonPrimitive import org.intellij.lang.annotations.Language import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult import org.jetbrains.kotlinx.jupyter.testkit.JupyterReplTestCase diff --git a/plugins/dataframe-gradle-plugin/src/test/kotlin/org/jetbrains/dataframe/gradle/DataFrameReadTest.kt b/plugins/dataframe-gradle-plugin/src/test/kotlin/org/jetbrains/dataframe/gradle/DataFrameReadTest.kt index e709dc2b05..e7307aac63 100644 --- a/plugins/dataframe-gradle-plugin/src/test/kotlin/org/jetbrains/dataframe/gradle/DataFrameReadTest.kt +++ b/plugins/dataframe-gradle-plugin/src/test/kotlin/org/jetbrains/dataframe/gradle/DataFrameReadTest.kt @@ -33,7 +33,7 @@ class DataFrameReadTest { fun `file with invalid json`() { val temp = Files.createTempDirectory("").toFile() val invalidJson = File(temp, "test.json").also { it.writeText(".") } - shouldThrow { + shouldNotThrowAny { DataFrame.read(invalidJson) } } From 78441dd446957ca0e4e45d7d86362456bf80e467 Mon Sep 17 00:00:00 2001 From: devcrocod Date: Mon, 4 Mar 2024 13:29:41 +0100 Subject: [PATCH 03/10] Add float support in JSON de/serialization --- .../kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt | 10 ++++------ .../kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt | 10 +++++++--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index bcc6b66d24..12489b05aa 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -16,6 +16,8 @@ import kotlinx.serialization.json.buildJsonObject import kotlinx.serialization.json.decodeFromStream import kotlinx.serialization.json.double import kotlinx.serialization.json.doubleOrNull +import kotlinx.serialization.json.float +import kotlinx.serialization.json.floatOrNull import kotlinx.serialization.json.int import kotlinx.serialization.json.intOrNull import kotlinx.serialization.json.jsonArray @@ -465,7 +467,7 @@ internal fun fromJsonListAnyColumns( v.intOrNull != null -> collector.add(v.int) v.longOrNull != null -> collector.add(v.long) v.doubleOrNull != null -> collector.add(v.double) - // v.floatOrNull != null -> collector.add(v.float) + v.floatOrNull != null -> collector.add(v.float) v.jsonPrimitive is JsonNull -> collector.add(null) } } @@ -779,8 +781,8 @@ internal fun fromJsonListArrayAndValueColumns( v.booleanOrNull != null -> collector.add(v.boolean) v.intOrNull != null -> collector.add(v.int) v.longOrNull != null -> collector.add(v.long) - // v.floatOrNull != null -> collector.add(v.float) v.doubleOrNull != null -> collector.add(v.double) + v.floatOrNull != null -> collector.add(v.float) v is JsonNull -> collector.add(null) else -> collector.add(v) } @@ -903,10 +905,6 @@ private val valueTypes = @OptIn(ExperimentalSerializationApi::class) private fun convert(value: Any?): JsonElement = when (value) { is JsonElement -> value - is Double -> JsonPrimitive(value) - is Float -> JsonPrimitive(value.toDouble()) // It is necessary - // because kotlinx-serialization accurately handles Float -> Float, - // unlike klaxon. is Number -> JsonPrimitive(value) is String -> JsonPrimitive(value) is Char -> JsonPrimitive(value.toString()) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index cab91e668c..0cac17bd47 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -20,7 +20,9 @@ import org.jetbrains.kotlinx.dataframe.api.getColumnGroup import org.jetbrains.kotlinx.dataframe.api.getFrameColumn import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.api.toDouble +import org.jetbrains.kotlinx.dataframe.api.toFloat import org.jetbrains.kotlinx.dataframe.api.toMap +import org.jetbrains.kotlinx.dataframe.api.with import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.columns.ValueColumn @@ -29,6 +31,7 @@ import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.* import org.jetbrains.kotlinx.dataframe.type import org.jetbrains.kotlinx.dataframe.values import org.junit.Test +import kotlin.math.exp import kotlin.reflect.* class JsonTests { @@ -382,15 +385,16 @@ class JsonTests { fun `NaN float serialization`() { val df = dataFrameOf("v")(1.1f, Float.NaN) df["v"].type() shouldBe typeOf() - DataFrame.readJsonStr(df.toJson()) shouldBe df.convert("v").toDouble() + val actual = DataFrame.readJsonStr(df.toJson()).convert("v").toFloat() + actual shouldBe df } @Test fun `NaN float serialization Any`() { val df = dataFrameOf("v")(1.1f, Float.NaN) df["v"].type() shouldBe typeOf() - DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS) shouldBe df.convert("v") - .toDouble() + val actual = DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS).convert("v").toFloat() + actual shouldBe df } @Test From d061d13fdebc963b5c801612f32310b2503a7004 Mon Sep 17 00:00:00 2001 From: devcrocod Date: Fri, 7 Jun 2024 15:51:37 +0200 Subject: [PATCH 04/10] Resolve conflicts after #573 pr --- .../kotlinx/dataframe/impl/io/readJson.kt | 136 ++-- .../kotlinx/dataframe/impl/io/writeJson.kt | 237 +++--- .../jetbrains/kotlinx/dataframe/io/json.kt | 741 +----------------- .../dataframe/jupyter/JupyterHtmlRenderer.kt | 19 +- .../org/jetbrains/kotlinx/dataframe/Utils.kt | 10 +- .../dataframe/io/ImageSerializationTests.kt | 19 +- .../jetbrains/kotlinx/dataframe/io/json.kt | 84 +- .../dataframe/jupyter/RenderingTests.kt | 15 +- .../dataframe/io/DefaultReadOpenApiMethod.kt | 2 + 9 files changed, 312 insertions(+), 951 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt index f1053cda81..51230cc615 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt @@ -1,7 +1,21 @@ package org.jetbrains.kotlinx.dataframe.impl.io -import com.beust.klaxon.JsonArray -import com.beust.klaxon.JsonObject +import kotlinx.serialization.json.JsonArray +import kotlinx.serialization.json.JsonNull +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.JsonPrimitive +import kotlinx.serialization.json.boolean +import kotlinx.serialization.json.booleanOrNull +import kotlinx.serialization.json.double +import kotlinx.serialization.json.doubleOrNull +import kotlinx.serialization.json.float +import kotlinx.serialization.json.floatOrNull +import kotlinx.serialization.json.int +import kotlinx.serialization.json.intOrNull +import kotlinx.serialization.json.jsonArray +import kotlinx.serialization.json.jsonPrimitive +import kotlinx.serialization.json.long +import kotlinx.serialization.json.longOrNull import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataColumn @@ -73,8 +87,8 @@ internal fun readJson( val df: AnyFrame = when (typeClashTactic) { ARRAY_AND_VALUE_COLUMNS -> { when (parsed) { - is JsonArray<*> -> fromJsonListArrayAndValueColumns( - records = parsed.value, + is JsonArray -> fromJsonListArrayAndValueColumns( + records = parsed, header = header, keyValuePaths = keyValuePaths, ) @@ -88,8 +102,8 @@ internal fun readJson( ANY_COLUMNS -> { when (parsed) { - is JsonArray<*> -> fromJsonListAnyColumns( - records = parsed.value, + is JsonArray -> fromJsonListAnyColumns( + records = parsed, header = header, keyValuePaths = keyValuePaths, ) @@ -126,18 +140,16 @@ internal fun fromJsonListAnyColumns( // list element type can be JsonObject, JsonArray or primitive val nameGenerator = ColumnNameGenerator() - records.forEach { - when (it) { + records.forEach { record -> + when (record) { is JsonObject -> { hasObject = true - it.entries.forEach { - nameGenerator.addIfAbsent(it.key) - } + record.entries.forEach { nameGenerator.addIfAbsent(it.key) } } - is JsonArray<*> -> hasArray = true - null -> Unit - else -> hasPrimitive = true + is JsonArray -> hasArray = true + is JsonNull, null -> Unit + is JsonPrimitive -> hasPrimitive = true } } @@ -155,7 +167,7 @@ internal fun fromJsonListAnyColumns( @Suppress("KotlinConstantConditions") val columns: List = when { - // Create one column of type Any? (or guessed primitive type) from all the records + // Create one column of type Any? (or guessed a primitive type) from all the records colType == AnyColType.ANY -> { val collector: DataCollectorBase = if (justPrimitives) createDataCollector(records.size) // guess the type @@ -177,7 +189,7 @@ internal fun fromJsonListAnyColumns( ) } - is JsonArray<*> -> { + is JsonArray -> { val parsed = fromJsonListAnyColumns( records = v, keyValuePaths = keyValuePaths, @@ -189,9 +201,21 @@ internal fun fromJsonListAnyColumns( ) } - "NaN" -> { - nanIndices.add(i) - collector.add(null) + is JsonPrimitive -> { + when { + v.content == "NaN" -> { + nanIndices.add(i) + collector.add(null) + } + + v.isString -> collector.add(v.content) + v.booleanOrNull != null -> collector.add(v.boolean) + v.intOrNull != null -> collector.add(v.int) + v.longOrNull != null -> collector.add(v.long) + v.doubleOrNull != null -> collector.add(v.double) + v.floatOrNull != null -> collector.add(v.float) + v.jsonPrimitive is JsonNull -> collector.add(null) + } } else -> collector.add(v) @@ -227,8 +251,8 @@ internal fun fromJsonListAnyColumns( records.forEach { startIndices.add(values.size) when (it) { - is JsonArray<*> -> values.addAll(it.value) - null -> Unit + is JsonArray -> values.addAll(it) + is JsonNull, null -> Unit else -> error("Expected JsonArray, got $it") } } @@ -242,10 +266,10 @@ internal fun fromJsonListAnyColumns( parsed.isSingleUnnamedColumn() -> { val col = (parsed.getColumn(0) as UnnamedColumn).col val elementType = col.type - val values = col.values.asList().splitByIndices(startIndices.asSequence()).toList() + val columnValues = col.values.asList().splitByIndices(startIndices.asSequence()).toList() DataColumn.createValueColumn( name = arrayColumnName, - values = values, + values = columnValues, type = List::class.createType(listOf(KTypeProjection.invariant(elementType))), ) } @@ -263,10 +287,10 @@ internal fun fromJsonListAnyColumns( colType == AnyColType.OBJECTS && isKeyValue -> { // collect the value types to make sure Value columns with lists and other values aren't all turned into lists val valueTypes = mutableSetOf() - val dataFrames = records.map { - when (it) { + val dataFrames = records.map { record -> + when (record) { is JsonObject -> { - val map = it.map.mapValues { (key, value) -> + val map = record.mapValues { (key, value) -> val parsed = fromJsonListAnyColumns( records = listOf(value), keyValuePaths = keyValuePaths, @@ -288,8 +312,8 @@ internal fun fromJsonListAnyColumns( ) } - null -> DataFrame.emptyOf() - else -> error("Expected JsonObject, got $it") + is JsonNull, null -> DataFrame.emptyOf() + else -> error("Expected JsonObject, got $record") } } @@ -328,7 +352,7 @@ internal fun fromJsonListAnyColumns( records.forEach { when (it) { is JsonObject -> values.add(it[colName]) - null -> values.add(null) + is JsonNull, null -> values.add(null) else -> error("Expected JsonObject, got $it") } } @@ -395,24 +419,24 @@ internal fun fromJsonListArrayAndValueColumns( // list element type can be JsonObject, JsonArray or primitive // So first, we gather all properties of objects to merge including "array" and "value" if needed - // so the resulting type of a property with instances 123, ["abc"], and { "a": 1, "b": 2 } will be + // so the resulting type of property with instances 123, ["abc"], and { "a": 1, "b": 2 } will be // { array: List, value: Int?, a: Int?, b: Int? } // and instances will look like // { "array": [], "value": 123, "a": null, "b": null } val nameGenerator = ColumnNameGenerator() - records.forEach { - when (it) { - is JsonObject -> it.entries.forEach { + records.forEach { record -> + when (record) { + is JsonObject -> record.entries.forEach { nameGenerator.addIfAbsent(it.key) } - is JsonArray<*> -> hasArray = true - null -> Unit - else -> hasPrimitive = true + is JsonArray -> hasArray = true + is JsonNull, null -> Unit + is JsonPrimitive -> hasPrimitive = true } } - if (records.all { it == null }) hasPrimitive = true + if (records.all { it == null || it is JsonNull }) hasPrimitive = true // Add a value column to the collected names if needed val valueColumn = if (hasPrimitive || records.isEmpty()) { @@ -433,10 +457,10 @@ internal fun fromJsonListArrayAndValueColumns( val columns: List = when { // instead of using the names, generate a single key/value frame column isKeyValue -> { - val dataFrames = records.map { - when (it) { + val dataFrames = records.map { record -> + when (record) { is JsonObject -> { - val map = it.map.mapValues { (key, value) -> + val map = record.mapValues { (key, value) -> val parsed = fromJsonListArrayAndValueColumns( records = listOf(value), keyValuePaths = keyValuePaths, @@ -459,8 +483,8 @@ internal fun fromJsonListArrayAndValueColumns( ) } - null -> DataFrame.emptyOf() - else -> error("Expected JsonObject, got $it") + is JsonNull, null -> DataFrame.emptyOf() + else -> error("Expected JsonObject, got $record") } } @@ -488,10 +512,23 @@ internal fun fromJsonListArrayAndValueColumns( records.forEachIndexed { i, v -> when (v) { is JsonObject -> collector.add(null) - is JsonArray<*> -> collector.add(null) - "NaN" -> { - nanIndices.add(i) - collector.add(null) + is JsonArray -> collector.add(null) + is JsonPrimitive -> { + when { + v.content == "NaN" -> { + nanIndices.add(i) + collector.add(null) + } + + v.isString -> collector.add(v.content) + v.booleanOrNull != null -> collector.add(v.boolean) + v.intOrNull != null -> collector.add(v.int) + v.longOrNull != null -> collector.add(v.long) + v.doubleOrNull != null -> collector.add(v.double) + v.floatOrNull != null -> collector.add(v.float) + v is JsonNull -> collector.add(null) + else -> collector.add(v) + } } else -> collector.add(v) @@ -526,7 +563,7 @@ internal fun fromJsonListArrayAndValueColumns( val startIndices = ArrayList() records.forEach { startIndices.add(values.size) - if (it is JsonArray<*>) values.addAll(it.value) + if (it is JsonArray) values.addAll(it.jsonArray) } val parsed = fromJsonListArrayAndValueColumns( records = values, @@ -538,10 +575,11 @@ internal fun fromJsonListArrayAndValueColumns( parsed.isSingleUnnamedColumn() -> { val col = (parsed.getColumn(0) as UnnamedColumn).col val elementType = col.type - val values = col.values.asList().splitByIndices(startIndices.asSequence()).toList() + val columnValues = + col.values.asList().splitByIndices(startIndices.asSequence()).toList() DataColumn.createValueColumn( name = colName, - values = values, + values = columnValues, type = List::class.createType(listOf(KTypeProjection.invariant(elementType))), ) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt index 2bff506bcd..c5d38f922d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt @@ -1,8 +1,18 @@ +@file:OptIn(ExperimentalSerializationApi::class) + package org.jetbrains.kotlinx.dataframe.impl.io -import com.beust.klaxon.JsonArray -import com.beust.klaxon.JsonObject -import com.beust.klaxon.KlaxonJson +import kotlinx.serialization.ExperimentalSerializationApi +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonArray +import kotlinx.serialization.json.JsonElement +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.JsonPrimitive +import kotlinx.serialization.json.buildJsonArray +import kotlinx.serialization.json.buildJsonObject +import kotlinx.serialization.json.encodeToJsonElement +import kotlinx.serialization.json.putJsonArray +import kotlinx.serialization.json.putJsonObject import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.ColumnsContainer @@ -36,17 +46,9 @@ import org.jetbrains.kotlinx.dataframe.typeClass import java.awt.image.BufferedImage import java.io.IOException -internal fun KlaxonJson.encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject? { - val values = frame.columns().map { col -> - when (col) { - is ColumnGroup<*> -> encodeRow(col, index) - is FrameColumn<*> -> encodeFrame(col[index]) - else -> encodeValue(col, index) - }.let { col.name to it } - } - if (values.isEmpty()) return null - return obj(values) -} +// See docs/serialization_format.md for a description of +// serialization versions and format. +internal const val SERIALIZATION_VERSION = "2.1.0" internal object SerializationKeys { const val DATA = "data" @@ -61,31 +63,70 @@ internal object SerializationKeys { const val TYPES = "types" } -// See docs/serialization_format.md for a description of -// serialization versions and format. -internal const val SERIALIZATION_VERSION = "2.1.0" +private val valueTypes = + setOf(Boolean::class, Double::class, Int::class, Float::class, Long::class, Short::class, Byte::class) + +@OptIn(ExperimentalSerializationApi::class) +private fun convert(value: Any?): JsonElement = when (value) { + is JsonElement -> value + is Number -> JsonPrimitive(value) + is String -> JsonPrimitive(value) + is Char -> JsonPrimitive(value.toString()) + is Boolean -> JsonPrimitive(value) + null -> JsonPrimitive(null) + else -> JsonPrimitive(value.toString()) +} + +internal fun encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject { + val values: Map = frame.columns().associate { col -> + col.name to when { + col is ColumnGroup<*> -> encodeRow(col, index) + col is FrameColumn<*> -> encodeFrame(col[index]) + col.isList() -> { + col[index]?.let { + JsonArray((it as List<*>).map { value -> convert(value) }) + } ?: JsonPrimitive(null) + } + + col.typeClass in valueTypes -> { + val v = col[index] + convert(v) + } + + else -> JsonPrimitive(col[index]?.toString()) + } + } + + if (values.isEmpty()) return buildJsonObject { } + return JsonObject(values) +} -internal fun KlaxonJson.encodeRowWithMetadata( +internal fun encodeRowWithMetadata( frame: ColumnsContainer<*>, index: Int, rowLimit: Int? = null, imageEncodingOptions: Base64ImageEncodingOptions? = null -): JsonObject? { - val values = frame.columns().map { col -> +): JsonElement? { + val values: List> = frame.columns().map { col -> when (col) { is ColumnGroup<*> -> { val schema = col.schema() - obj( - DATA to encodeRowWithMetadata(col, index, rowLimit, imageEncodingOptions), - METADATA to obj( - KIND to ColumnKind.Group.toString(), - COLUMNS to schema.columns.keys, - TYPES to schema.columns.values.map { columnSchema -> - createJsonTypeDescriptor(columnSchema) + buildJsonObject { + put(DATA, encodeRowWithMetadata(col, index, rowLimit, imageEncodingOptions) ?: JsonPrimitive(null)) + putJsonObject(METADATA) { + put(KIND, JsonPrimitive(ColumnKind.Group.toString())) + put(COLUMNS, Json.encodeToJsonElement(schema.columns.keys)) + putJsonArray(TYPES) { + addAll( + schema.columns.values.map { columnSchema -> + createJsonTypeDescriptor(columnSchema) + } + ) } - ), - ) + } + } } + is FrameColumn<*> -> { val data = if (rowLimit == null) { encodeFrameWithMetadata(col[index], null, imageEncodingOptions) @@ -93,59 +134,50 @@ internal fun KlaxonJson.encodeRowWithMetadata( encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, imageEncodingOptions) } val schema = col.schema.value - obj( - DATA to data, - METADATA to obj( - KIND to ColumnKind.Frame.toString(), - COLUMNS to schema.columns.keys, - TYPES to schema.columns.values.map { columnSchema -> - createJsonTypeDescriptor(columnSchema) - }, - NCOL to col[index].ncol, - NROW to col[index].nrow - ) - ) + buildJsonObject { + put(DATA, data) + putJsonObject(METADATA) { + put(KIND, JsonPrimitive(ColumnKind.Frame.toString())) + put(COLUMNS, Json.encodeToJsonElement(schema.columns.keys)) + putJsonArray(TYPES) { + addAll( + schema.columns.values.map { columnSchema -> + createJsonTypeDescriptor(columnSchema) + } + ) + } + put(NCOL, JsonPrimitive(col[index].ncol)) + put(NROW, JsonPrimitive(col[index].nrow)) + } + } } + else -> encodeValue(col, index, imageEncodingOptions) }.let { col.name to it } } if (values.isEmpty()) return null - return obj(values) + JsonObject(mapOf("exampleKey" to JsonPrimitive("exampleValue"))) + return JsonObject(values.toMap()) } -private val valueTypes = - setOf(Boolean::class, Double::class, Int::class, Float::class, Long::class, Short::class, Byte::class) - -internal fun KlaxonJson.encodeValue( +internal fun encodeValue( col: AnyCol, index: Int, imageEncodingOptions: Base64ImageEncodingOptions? = null -): Any? = when { +): JsonElement = when { col.isList() -> col[index]?.let { list -> - val values = (list as List<*>).map { - when (it) { - null, is Int, is Double, is Float, is Long, is Boolean, is Short, is Byte -> it - // Klaxon default serializers will try to use reflection and can sometimes fail. - // We can't have exceptions in Notebook DataFrame renderer - else -> it.toString() - } - } - array(values) - } ?: array() - - col.typeClass in valueTypes -> { - val v = col[index] - if ((v is Double && v.isNaN()) || (v is Float && v.isNaN())) { - v.toString() - } else v - } + val values = (list as List<*>).map { convert(it) } + JsonArray(values) + } ?: JsonArray(emptyList()) + + col.typeClass in valueTypes -> convert(col[index]) col.typeClass == BufferedImage::class && imageEncodingOptions != null -> col[index]?.let { image -> - encodeBufferedImageAsBase64(image as BufferedImage, imageEncodingOptions) - } ?: "" + JsonPrimitive(encodeBufferedImageAsBase64(image as BufferedImage, imageEncodingOptions)) + } ?: JsonPrimitive("") - else -> col[index]?.toString() + else -> JsonPrimitive(col[index]?.toString()) } private fun encodeBufferedImageAsBase64( @@ -173,19 +205,19 @@ private fun encodeBufferedImageAsBase64( private fun createJsonTypeDescriptor(columnSchema: ColumnSchema): JsonObject { return JsonObject( - mutableMapOf(KIND to columnSchema.kind.toString()).also { + mutableMapOf(KIND to JsonPrimitive(columnSchema.kind.toString())).also { if (columnSchema.kind == ColumnKind.Value) { - it.put(TYPE, columnSchema.type.toString()) + it[TYPE] = JsonPrimitive(columnSchema.type.toString()) } } ) } -internal fun KlaxonJson.encodeFrameWithMetadata( +internal fun encodeFrameWithMetadata( frame: AnyFrame, rowLimit: Int? = null, imageEncodingOptions: Base64ImageEncodingOptions? = null -): JsonArray<*> { +): JsonArray { val valueColumn = frame.extractValueColumn() val arrayColumn = frame.extractArrayColumn() @@ -205,7 +237,7 @@ internal fun KlaxonJson.encodeFrameWithMetadata( ?: encodeRowWithMetadata(frame, rowIndex, rowLimit, imageEncodingOptions) } - return array(data) + return buildJsonArray { addAll(data.map { convert(it) }) } } internal fun AnyFrame.extractValueColumn(): DataColumn<*>? { @@ -232,9 +264,9 @@ internal fun AnyFrame.extractValueColumn(): DataColumn<*>? { } } -// if there is only 1 column, then `isValidValueColumn` always true. -// But at the same time, we shouldn't treat dataFrameOf("value")(1,2,3) like unnamed column -// because it was created by user. +// If there is only 1 column, then `isValidValueColumn` always true. +// But at the same time, we shouldn't treat dataFrameOf("value")(1,2,3) like an unnamed column +// because it was created by the user. internal val AnyFrame.isPossibleToFindUnnamedColumns: Boolean get() = columns().size != 1 @@ -261,45 +293,48 @@ internal fun AnyFrame.extractArrayColumn(): DataColumn<*>? { } } -internal fun KlaxonJson.encodeFrame(frame: AnyFrame): JsonArray<*> { +internal fun encodeFrame(frame: AnyFrame): JsonArray { val valueColumn = frame.extractValueColumn() val arrayColumn = frame.extractArrayColumn() val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame val data = frame.indices().map { rowIndex -> - valueColumn - ?.get(rowIndex) - ?: arrayColumn?.get(rowIndex) - ?.let { - if (arraysAreFrames) encodeFrame(it as AnyFrame) else null - } - ?: encodeRow(frame, rowIndex) + valueColumn?.get(rowIndex) ?: arrayColumn?.get(rowIndex)?.let { + if (arraysAreFrames) encodeFrame(it as AnyFrame) else null + } ?: encodeRow(frame, rowIndex) } - return array(data) + return buildJsonArray { addAll(data.map { convert(it) }) } } -internal fun KlaxonJson.encodeDataFrameWithMetadata( +internal fun encodeDataFrameWithMetadata( frame: AnyFrame, rowLimit: Int, nestedRowLimit: Int? = null, imageEncodingOptions: Base64ImageEncodingOptions? = null ): JsonObject { - return obj( - VERSION to SERIALIZATION_VERSION, - METADATA to obj( - COLUMNS to frame.columnNames(), - TYPES to frame.schema().columns.values.map { colSchema -> - createJsonTypeDescriptor(colSchema) - }, - NROW to frame.rowsCount(), - NCOL to frame.columnsCount() - ), - KOTLIN_DATAFRAME to encodeFrameWithMetadata( - frame.take(rowLimit), - rowLimit = nestedRowLimit, - imageEncodingOptions - ), - ) + return buildJsonObject { + put(VERSION, JsonPrimitive(SERIALIZATION_VERSION)) + putJsonObject(METADATA) { + putJsonArray(COLUMNS) { addAll(frame.columnNames().map { JsonPrimitive(it) }) } + putJsonArray(TYPES) { + addAll( + frame.schema().columns.values.map { colSchema -> + createJsonTypeDescriptor(colSchema) + } + ) + } + put(NROW, JsonPrimitive(frame.rowsCount())) + put(NCOL, JsonPrimitive(frame.columnsCount())) + } + put( + KOTLIN_DATAFRAME, + encodeFrameWithMetadata( + frame.take(rowLimit), + rowLimit = nestedRowLimit, + imageEncodingOptions + ) + ) + } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 001f235f38..61c864941e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -1,32 +1,9 @@ -@file:OptIn(ExperimentalSerializationApi::class) - package org.jetbrains.kotlinx.dataframe.io import kotlinx.serialization.ExperimentalSerializationApi import kotlinx.serialization.json.Json -import kotlinx.serialization.json.JsonArray import kotlinx.serialization.json.JsonElement -import kotlinx.serialization.json.JsonNull -import kotlinx.serialization.json.JsonObject -import kotlinx.serialization.json.JsonPrimitive -import kotlinx.serialization.json.boolean -import kotlinx.serialization.json.booleanOrNull -import kotlinx.serialization.json.buildJsonArray -import kotlinx.serialization.json.buildJsonObject import kotlinx.serialization.json.decodeFromStream -import kotlinx.serialization.json.double -import kotlinx.serialization.json.doubleOrNull -import kotlinx.serialization.json.float -import kotlinx.serialization.json.floatOrNull -import kotlinx.serialization.json.int -import kotlinx.serialization.json.intOrNull -import kotlinx.serialization.json.jsonArray -import kotlinx.serialization.json.jsonPrimitive -import kotlinx.serialization.json.long -import kotlinx.serialization.json.longOrNull -import org.jetbrains.kotlinx.dataframe.AnyCol -import com.beust.klaxon.Parser -import com.beust.klaxon.json import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.DataFrame @@ -42,6 +19,8 @@ import org.jetbrains.kotlinx.dataframe.impl.io.encodeDataFrameWithMetadata import org.jetbrains.kotlinx.dataframe.impl.io.encodeFrame import org.jetbrains.kotlinx.dataframe.impl.io.encodeRow import org.jetbrains.kotlinx.dataframe.impl.io.readJson +import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.GZIP_ON +import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.LIMIT_SIZE_ON import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS @@ -286,686 +265,6 @@ public fun DataRow.Companion.readJsonStr( typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, ): AnyRow = DataFrame.readJsonStr(text, header, keyValuePaths, typeClashTactic).single() -private fun readJson( - parsed: Any?, - header: List, - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): DataFrame<*> { - val df: AnyFrame = when (typeClashTactic) { - ARRAY_AND_VALUE_COLUMNS -> { - when (parsed) { - is JsonArray -> fromJsonListArrayAndValueColumns( - records = parsed, - header = header, - keyValuePaths = keyValuePaths, - ) - - else -> fromJsonListArrayAndValueColumns( - records = listOf(parsed), - keyValuePaths = keyValuePaths, - ) - } - } - - ANY_COLUMNS -> { - when (parsed) { - is JsonArray -> fromJsonListAnyColumns( - records = parsed, - header = header, - keyValuePaths = keyValuePaths, - ) - - else -> fromJsonListAnyColumns( - records = listOf(parsed), - keyValuePaths = keyValuePaths, - ) - } - } - } - return df.unwrapUnnamedColumns() -} - -private fun DataFrame.unwrapUnnamedColumns() = - dataFrameOf(columns().map { it.unwrapUnnamedColumn() }) - -private fun AnyCol.unwrapUnnamedColumn() = if (this is UnnamedColumn) col else this - -private enum class AnyColType { - ANY, - ARRAYS, - OBJECTS, -} - -internal interface AnyKeyValueProperty : KeyValueProperty { - override val value: Any? -} - -/** - * Json to DataFrame converter that creates [Any] columns. - * A.k.a. [TypeClashTactic.ANY_COLUMNS]. - * - * @param records List of json elements to be converted to a [DataFrame]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param header Optional list of column names. If given, [records] will be read like an object with [header] being the keys. - * @return [DataFrame] from the given [records]. - */ -internal fun fromJsonListAnyColumns( - records: List<*>, - keyValuePaths: List = emptyList(), - header: List = emptyList(), - jsonPath: JsonPath = JsonPath(), -): AnyFrame { - var hasPrimitive = false - var hasArray = false - var hasObject = false - - // list element type can be JsonObject, JsonArray or primitive - val nameGenerator = ColumnNameGenerator() - records.forEach { - when (it) { - is JsonObject -> { - hasObject = true - it.entries.forEach { - nameGenerator.addIfAbsent(it.key) - } - } - - is JsonArray -> hasArray = true - is JsonNull, null -> Unit - is JsonPrimitive -> hasPrimitive = true - } - } - - val colType = when { - hasArray && !hasPrimitive && !hasObject -> AnyColType.ARRAYS - hasObject && !hasPrimitive && !hasArray -> AnyColType.OBJECTS - else -> AnyColType.ANY - } - val justPrimitives = hasPrimitive && !hasArray && !hasObject - val isKeyValue = keyValuePaths.any { jsonPath.matches(it) } - - if (isKeyValue && colType != AnyColType.OBJECTS) { - error("Key value path $jsonPath does not match objects.") - } - - @Suppress("KotlinConstantConditions") - val columns: List = when { - // Create one column of type Any? (or guessed primitive type) from all the records - colType == AnyColType.ANY -> { - val collector: DataCollectorBase = - if (justPrimitives) createDataCollector(records.size) // guess the type - else createDataCollector(records.size, typeOf()) // use Any? - - val nanIndices = mutableListOf() - records.forEachIndexed { i, v -> - when (v) { - is JsonObject -> { - val parsed = - fromJsonListAnyColumns( - records = listOf(v), - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.replaceLastWildcardWithIndex(i), - ) - collector.add( - if (parsed.isSingleUnnamedColumn()) (parsed.getColumn(0) as UnnamedColumn).col.values.first() - else parsed.firstOrNull() ?: DataRow.empty - ) - } - - is JsonArray -> { - val parsed = fromJsonListAnyColumns( - records = v, - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.replaceLastWildcardWithIndex(i).appendArrayWithWildcard(), - ) - collector.add( - if (parsed.isSingleUnnamedColumn()) (parsed.getColumn(0) as UnnamedColumn).col.values.asList() - else parsed.unwrapUnnamedColumns() - ) - } - - is JsonPrimitive -> { - when { - v.content == "NaN" -> { - nanIndices.add(i) - collector.add(null) - } - - v.isString -> collector.add(v.content) - v.booleanOrNull != null -> collector.add(v.boolean) - v.intOrNull != null -> collector.add(v.int) - v.longOrNull != null -> collector.add(v.long) - v.doubleOrNull != null -> collector.add(v.double) - v.floatOrNull != null -> collector.add(v.float) - v.jsonPrimitive is JsonNull -> collector.add(null) - } - } - - else -> collector.add(v) - } - } - val column = collector.toColumn(valueColumnName) - val res = if (nanIndices.isNotEmpty()) { - fun DataColumn.updateNaNs(nanValue: C): DataColumn { - var j = 0 - var nextNanIndex = nanIndices[j] - return mapIndexed(column.type) { i, v -> - if (i == nextNanIndex) { - j++ - nextNanIndex = if (j < nanIndices.size) nanIndices[j] else -1 - nanValue - } else v - } - } - when (column.typeClass) { - Double::class -> column.cast().updateNaNs(Double.NaN) - Float::class -> column.cast().updateNaNs(Float.NaN) - String::class -> column.cast().updateNaNs("NaN") - else -> column - } - } else column - listOf(UnnamedColumn(res)) - } - - // Create one column of type FrameColumn, or List<> from all the records if they are all arrays - colType == AnyColType.ARRAYS -> { - val values = mutableListOf() - val startIndices = ArrayList() - records.forEach { - startIndices.add(values.size) - when (it) { - is JsonArray -> values.addAll(it) - is JsonNull, null -> Unit - else -> error("Expected JsonArray, got $it") - } - } - val parsed = fromJsonListAnyColumns( - records = values, - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.appendArrayWithWildcard(), - ) - - val res = when { - parsed.isSingleUnnamedColumn() -> { - val col = (parsed.getColumn(0) as UnnamedColumn).col - val elementType = col.type - val values = col.values.asList().splitByIndices(startIndices.asSequence()).toList() - DataColumn.createValueColumn( - name = arrayColumnName, - values = values, - type = List::class.createType(listOf(KTypeProjection.invariant(elementType))), - ) - } - - else -> DataColumn.createFrameColumn( - name = arrayColumnName, // will be erased - df = parsed.unwrapUnnamedColumns(), - startIndices = startIndices, - ) - } - listOf(UnnamedColumn(res)) - } - - // Create one column of type FrameColumn - colType == AnyColType.OBJECTS && isKeyValue -> { - // collect the value types to make sure Value columns with lists and other values aren't all turned into lists - val valueTypes = mutableSetOf() - val dataFrames = records.map { - when (it) { - is JsonObject -> { - val map = it.mapValues { (key, value) -> - val parsed = fromJsonListAnyColumns( - records = listOf(value), - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.append(key), - ) - if (parsed.isSingleUnnamedColumn()) (parsed.getColumn(0) as UnnamedColumn).col.values.first() - else parsed.unwrapUnnamedColumns().firstOrNull() - } - val valueType = map.values.map { - guessValueType(sequenceOf(it)) - }.commonType() - - valueTypes += valueType - - dataFrameOf( - columnOf(*map.keys.toTypedArray()).named(KeyValueProperty<*>::key.name), - createColumn(values = map.values, suggestedType = valueType, guessType = false) - .named(KeyValueProperty<*>::value.name), - ) - } - - is JsonNull, null -> DataFrame.emptyOf() - else -> error("Expected JsonObject, got $it") - } - } - - val valueColumns = dataFrames.map { it[KeyValueProperty<*>::value.name] } - val valueColumnSchema = when { - // in these cases we can safely combine the columns to get a single column schema - valueColumns.all { it is ColumnGroup<*> } || valueColumns.all { it is FrameColumn<*> } -> - valueColumns.concat().extractSchema() - // to avoid listification, we create the value columns schema ourselves (https://github.com/Kotlin/dataframe/issues/184) - else -> ColumnSchema.Value(valueTypes.commonType()) - } - - listOf( - UnnamedColumn( - DataColumn.createFrameColumn( - name = valueColumnName, // will be erased unless at top-level - groups = dataFrames, - schema = lazy { - DataFrameSchemaImpl( - columns = mapOf( - KeyValueProperty<*>::key.name to ColumnSchema.Value(typeOf()), - KeyValueProperty<*>::value.name to valueColumnSchema, - ) - ) - }, - ) - ) - ) - } - - // Create multiple columns from all the records if they are all objects, merging the objects in essence - colType == AnyColType.OBJECTS && !isKeyValue -> { - nameGenerator.names.map { colName -> - val values = ArrayList(records.size) - - records.forEach { - when (it) { - is JsonObject -> values.add(it[colName]) - is JsonNull, null -> values.add(null) - else -> error("Expected JsonObject, got $it") - } - } - - val parsed = fromJsonListAnyColumns( - records = values, - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.append(colName), - ) - when { - parsed.ncol == 0 -> - DataColumn.createValueColumn( - name = colName, - values = arrayOfNulls(values.size).toList(), - type = typeOf(), - ) - - parsed.isSingleUnnamedColumn() -> - (parsed.getColumn(0) as UnnamedColumn).col.rename(colName) - - else -> - DataColumn.createColumnGroup(colName, parsed.unwrapUnnamedColumns()) as AnyCol - } - } - } - - else -> error("") - } - - return when { - columns.isEmpty() -> DataFrame.empty(records.size) - - columns.size == 1 && hasArray && header.isNotEmpty() && columns[0].typeClass == List::class -> - columns[0] - .cast>() - .splitInto(*header.toTypedArray()) - - else -> columns.toDataFrame() - } -} - -public const val arrayColumnName: String = "array" -public const val valueColumnName: String = "value" - -private fun AnyFrame.isSingleUnnamedColumn() = ncol == 1 && getColumn(0) is UnnamedColumn - -/** - * Json to DataFrame converter that creates allows creates `value` and `array` accessors - * instead of [Any] columns. - * A.k.a. [TypeClashTactic.ARRAY_AND_VALUE_COLUMNS]. - * - * @param records List of json elements to be converted to a [DataFrame]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param header Optional list of column names. If given, [records] will be read like an object with [header] being the keys. - * @return [DataFrame] from the given [records]. - */ -internal fun fromJsonListArrayAndValueColumns( - records: List<*>, - keyValuePaths: List = emptyList(), - header: List = emptyList(), - jsonPath: JsonPath = JsonPath(), -): AnyFrame { - var hasPrimitive = false - var hasArray = false - val isKeyValue = keyValuePaths.any { jsonPath.matches(it) } - - // list element type can be JsonObject, JsonArray or primitive - // So first, we gather all properties of objects to merge including "array" and "value" if needed - // so the resulting type of a property with instances 123, ["abc"], and { "a": 1, "b": 2 } will be - // { array: List, value: Int?, a: Int?, b: Int? } - // and instances will look like - // { "array": [], "value": 123, "a": null, "b": null } - - val nameGenerator = ColumnNameGenerator() - records.forEach { - when (it) { - is JsonObject -> it.entries.forEach { - nameGenerator.addIfAbsent(it.key) - } - - is JsonArray -> hasArray = true - is JsonNull, null -> Unit - is JsonPrimitive -> hasPrimitive = true - } - } - if (records.all { it == null || it is JsonNull }) hasPrimitive = true - - // Add a value column to the collected names if needed - val valueColumn = if (hasPrimitive || records.isEmpty()) { - nameGenerator.addUnique(valueColumnName) - } else null - - // Add an array column to the collected names if needed - val arrayColumn = if (hasArray) { - nameGenerator.addUnique(arrayColumnName) - } else null - - // only properties that consist of just objects (or are empty) can be merged to key/value FrameColumns - if (isKeyValue && (hasPrimitive || hasArray)) { - error("Key value path $jsonPath does not match objects.") - } - - // Create columns from the collected names - val columns: List = when { - // instead of using the names, generate a single key/value frame column - isKeyValue -> { - val dataFrames = records.map { - when (it) { - is JsonObject -> { - val map = it.mapValues { (key, value) -> - val parsed = fromJsonListArrayAndValueColumns( - records = listOf(value), - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.append(key), - ) - if (parsed.isSingleUnnamedColumn()) (parsed.getColumn(0) as UnnamedColumn).col.values.first() - else parsed.unwrapUnnamedColumns().firstOrNull() - } - val valueType = - map.values.map { guessValueType(sequenceOf(it)) } - .commonType() - - dataFrameOf( - columnOf(*map.keys.toTypedArray()).named(KeyValueProperty<*>::key.name), - createColumn( - values = map.values, - suggestedType = valueType, - guessType = false, - ).named(KeyValueProperty<*>::value.name), - ) - } - - is JsonNull, null -> DataFrame.emptyOf() - else -> error("Expected JsonObject, got $it") - } - } - - listOf( - UnnamedColumn( - DataColumn.createFrameColumn( - name = valueColumnName, // will be erased unless at top-level - groups = dataFrames, - schema = lazy { - dataFrames.mapNotNull { it.takeIf { it.nrow > 0 }?.schema() }.intersectSchemas() - }, - ) - ) - ) - } - - // generate columns using the collected names - else -> - nameGenerator.names.map { colName -> - when { - // Collect primitive values from records into the `value` column if needed - colName == valueColumn && (hasPrimitive || records.isEmpty()) -> { - val collector = createDataCollector(records.size) - val nanIndices = mutableListOf() - records.forEachIndexed { i, v -> - when (v) { - is JsonObject -> collector.add(null) - is JsonArray -> collector.add(null) - is JsonPrimitive -> { - when { - v.content == "NaN" -> { - nanIndices.add(i) - collector.add(null) - } - - v.isString -> collector.add(v.content) - v.booleanOrNull != null -> collector.add(v.boolean) - v.intOrNull != null -> collector.add(v.int) - v.longOrNull != null -> collector.add(v.long) - v.doubleOrNull != null -> collector.add(v.double) - v.floatOrNull != null -> collector.add(v.float) - v is JsonNull -> collector.add(null) - else -> collector.add(v) - } - } - - else -> collector.add(v) - } - } - val column = collector.toColumn(colName) - val res = if (nanIndices.isNotEmpty()) { - fun DataColumn.updateNaNs(nanValue: C): DataColumn { - var j = 0 - var nextNanIndex = nanIndices[j] - return mapIndexed(column.type) { i, v -> - if (i == nextNanIndex) { - j++ - nextNanIndex = if (j < nanIndices.size) nanIndices[j] else -1 - nanValue - } else v - } - } - when (column.typeClass) { - Double::class -> column.cast().updateNaNs(Double.NaN) - Float::class -> column.cast().updateNaNs(Float.NaN) - String::class -> column.cast().updateNaNs("NaN") - else -> column - } - } else column - UnnamedColumn(res) - } - - // Collect arrays from records into the `array` column if needed - colName == arrayColumn && hasArray -> { - val values = mutableListOf() - val startIndices = ArrayList() - records.forEach { - startIndices.add(values.size) - if (it is JsonArray) values.addAll(it.jsonArray) - } - val parsed = fromJsonListArrayAndValueColumns( - records = values, - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.appendArrayWithWildcard(), - ) - - val res = when { - parsed.isSingleUnnamedColumn() -> { - val col = (parsed.getColumn(0) as UnnamedColumn).col - val elementType = col.type - val values = col.values.asList().splitByIndices(startIndices.asSequence()).toList() - DataColumn.createValueColumn( - name = colName, - values = values, - type = List::class.createType(listOf(KTypeProjection.invariant(elementType))), - ) - } - - else -> DataColumn.createFrameColumn(colName, parsed.unwrapUnnamedColumns(), startIndices) - } - UnnamedColumn(res) - } - - // Collect the current column name as property from the objects in records - else -> { - val values = ArrayList(records.size) - records.forEach { - when (it) { - is JsonObject -> values.add(it[colName]) - else -> values.add(null) - } - } - - val parsed = fromJsonListArrayAndValueColumns( - records = values, - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.append(colName), - ) - when { - parsed.ncol == 0 -> - DataColumn.createValueColumn( - name = colName, - values = arrayOfNulls(values.size).toList(), - type = typeOf(), - ) - - parsed.isSingleUnnamedColumn() -> - (parsed.getColumn(0) as UnnamedColumn).col.rename(colName) - - else -> - DataColumn.createColumnGroup(colName, parsed.unwrapUnnamedColumns()) as AnyCol - } - } - } - } - } - - return when { - columns.isEmpty() -> - DataFrame.empty(records.size) - - columns.size == 1 && hasArray && header.isNotEmpty() && columns[0].typeClass == List::class -> - columns[0] - .cast>() - .splitInto(*header.toTypedArray()) - - else -> - columns.toDataFrame() - } -} - -// we need it to check if AnyFrame created by recursive call has single unnamed column, -// unnamed column means this column is not created from field of a record [{"value": 1}, {"value": 2}], -// but filtered values [1, { ... }, []] -> [1, null, null] -// or arrays: [1, { ...}, []] -> [null, null, []] -private class UnnamedColumn(val col: DataColumn) : DataColumn by col - -private val valueTypes = - setOf(Boolean::class, Double::class, Int::class, Float::class, Long::class, Short::class, Byte::class) - -@OptIn(ExperimentalSerializationApi::class) -private fun convert(value: Any?): JsonElement = when (value) { - is JsonElement -> value - is Number -> JsonPrimitive(value) - is String -> JsonPrimitive(value) - is Char -> JsonPrimitive(value.toString()) - is Boolean -> JsonPrimitive(value) - null -> JsonPrimitive(null) - else -> JsonPrimitive(value.toString()) -} - -internal fun encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject { - val values: Map = frame.columns().associate { col -> - col.name to when { - col is ColumnGroup<*> -> encodeRow(col, index) - col is FrameColumn<*> -> encodeFrame(col[index]) - col.isList() -> { - col[index]?.let { - JsonArray((it as List<*>).map { value -> convert(value) }) - } ?: JsonPrimitive(null) - } - - col.typeClass in valueTypes -> { - val v = col[index] - convert(v) - } - - else -> JsonPrimitive(col[index]?.toString()) - } - } - - if (values.isEmpty()) return buildJsonObject { } - return JsonObject(values) -} - -internal fun encodeFrame(frame: AnyFrame): JsonArray { - val allColumns = frame.columns() - - // if there is only 1 column, then `isValidValueColumn` always true. - // But at the same time, we shouldn't treat dataFrameOf("value")(1,2,3) like unnamed column - // because it was created by user. - val isPossibleToFindUnnamedColumns = allColumns.size != 1 - val valueColumn = allColumns.filter { it.name.startsWith(valueColumnName) } - .takeIf { isPossibleToFindUnnamedColumns } - ?.maxByOrNull { it.name }?.let { valueCol -> - if (valueCol.kind() != ColumnKind.Value) { // check that value in this column is not null only when other values are null - null - } else { - // check that value in this column is not null only when other values are null - val isValidValueColumn = frame.rows().all { row -> - if (valueCol[row] != null) { - allColumns.all { col -> - if (col.name != valueCol.name) col[row] == null - else true - } - } else true - } - if (isValidValueColumn) valueCol - else null - } - } - - val arrayColumn = allColumns.filter { it.name.startsWith(arrayColumnName) } - .takeIf { isPossibleToFindUnnamedColumns } - ?.maxByOrNull { it.name }?.let { arrayCol -> - if (arrayCol.kind() == ColumnKind.Group) null - else { - // check that value in this column is not null only when other values are null - val isValidArrayColumn = frame.rows().all { row -> - if (arrayCol[row] != null) { - allColumns.all { col -> - if (col.name != arrayCol.name) col[row] == null - else true - } - } else true - } - if (isValidArrayColumn) arrayCol - else null - } - } - - val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame - - val data = frame.indices().map { rowIndex -> - valueColumn?.get(rowIndex) ?: arrayColumn?.get(rowIndex) - ?.let { if (arraysAreFrames) encodeFrame(it as AnyFrame) else null } ?: encodeRow( - frame, - rowIndex - ) - } - return buildJsonArray { addAll(data.map { convert(it) }) } -} - public fun AnyFrame.toJson(prettyPrint: Boolean = false): String { val json = Json { this.prettyPrint = prettyPrint @@ -973,29 +272,17 @@ public fun AnyFrame.toJson(prettyPrint: Boolean = false): String { allowSpecialFloatingPointValues = true } return json.encodeToString(JsonElement.serializer(), encodeFrame(this@toJson)) -public fun AnyFrame.toJson(prettyPrint: Boolean = false, canonical: Boolean = false): String { - return json { - encodeFrame(this@toJson) - }.toJsonString(prettyPrint, canonical) } -public fun AnyRow.toJson(prettyPrint: Boolean = false): String { - val json = Json { - this.prettyPrint = prettyPrint - isLenient = true - allowSpecialFloatingPointValues = true - } - return json.encodeToString(JsonElement.serializer(), encodeRow(df(), index())) /** * Converts the DataFrame to a JSON string representation with additional metadata about serialized data. - * It is heavily used to implement some integration features in Kotlin Notebook IntellJ IDEA plugin. + * It is heavily used to implement some integration features in Kotlin Notebook IntelliJ IDEA plugin. * * @param rowLimit The maximum number of top-level dataframe rows to include in the output JSON. * @param nestedRowLimit The maximum number of nested frame rows to include in the output JSON. * If null, all rows are included. * Applied for each frame column recursively * @param prettyPrint Specifies whether the output JSON should be formatted with indentation and line breaks. - * @param canonical Specifies whether the output JSON should be in a canonical form. * @param imageEncodingOptions The options for encoding images. The default is null, which indicates that the image is not encoded as Base64. * * @return The DataFrame converted to a JSON string with metadata. @@ -1004,12 +291,17 @@ public fun AnyFrame.toJsonWithMetadata( rowLimit: Int, nestedRowLimit: Int? = null, prettyPrint: Boolean = false, - canonical: Boolean = false, imageEncodingOptions: Base64ImageEncodingOptions? = null ): String { - return json { + val json = Json { + this.prettyPrint = prettyPrint + isLenient = true + allowSpecialFloatingPointValues = true + } + return json.encodeToString( + JsonElement.serializer(), encodeDataFrameWithMetadata(this@toJsonWithMetadata, rowLimit, nestedRowLimit, imageEncodingOptions) - }.toJsonString(prettyPrint, canonical) + ) } internal const val DEFAULT_IMG_SIZE = 600 @@ -1037,10 +329,13 @@ public class Base64ImageEncodingOptions( } } -public fun AnyRow.toJson(prettyPrint: Boolean = false, canonical: Boolean = false): String { - return json { - encodeRow(df(), index()) - }?.toJsonString(prettyPrint, canonical) ?: "" +public fun AnyRow.toJson(prettyPrint: Boolean = false): String { + val json = Json { + this.prettyPrint = prettyPrint + isLenient = true + allowSpecialFloatingPointValues = true + } + return json.encodeToString(JsonElement.serializer(), encodeRow(df(), index())) } public fun AnyFrame.writeJson(file: File, prettyPrint: Boolean = false) { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt index f75114ea2b..4d8e91bffe 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt @@ -5,8 +5,6 @@ import kotlinx.serialization.json.addAll import kotlinx.serialization.json.buildJsonObject import kotlinx.serialization.json.put import kotlinx.serialization.json.putJsonArray -import org.jetbrains.kotlinx.dataframe.api.rows -import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.api.take import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME @@ -29,6 +27,7 @@ import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult import org.jetbrains.kotlinx.jupyter.api.Notebook import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration import org.jetbrains.kotlinx.jupyter.api.mimeResult +import org.jetbrains.kotlinx.jupyter.api.outputs.isIsolatedHtml import org.jetbrains.kotlinx.jupyter.api.renderHtmlAsIFrameIfNeeded /** Starting from this version, dataframe integration will respond with additional data for rendering in Kotlin Notebooks plugin. */ @@ -79,14 +78,12 @@ internal inline fun JupyterHtmlRenderer.render( // TODO Do we need to handle the improved meta data here as well? val jsonEncodedDf = when { !ideBuildNumber.supportsDynamicNestedTables() -> { - json { - obj( - NROW to df.size.nrow, - NCOL to df.size.ncol, - COLUMNS to df.columnNames(), - KOTLIN_DATAFRAME to encodeFrame(df.take(limit)), - ) - }.toJsonString() + buildJsonObject { + put(NROW, df.size.nrow) + put(NCOL, df.size.ncol) + putJsonArray(COLUMNS) { addAll(df.columnNames()) } + put(KOTLIN_DATAFRAME, encodeFrame(df.take(limit))) + }.toString() } else -> { @@ -128,7 +125,7 @@ internal fun Notebook.renderAsIFrameAsNeeded( return mimeResult( "text/html" to textHtml, "application/kotlindataframe+json" to jsonEncodedDf - ).also { it.isolatedHtml = false } + ).also { it.isIsolatedHtml = false } } internal fun DataFrameHtmlData.toJupyterHtmlData() = HtmlData(style, body, script) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt index dae5f687ba..9c3f18912f 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt @@ -1,7 +1,8 @@ package org.jetbrains.kotlinx.dataframe -import com.beust.klaxon.JsonObject -import com.beust.klaxon.Parser +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.jsonObject import org.jetbrains.kotlinx.dataframe.api.print import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.io.renderToString @@ -27,7 +28,4 @@ fun > T.alsoDebug(println: String? = null, rowsLimit: Int = 20) schema().print() } -fun parseJsonStr(jsonStr: String): JsonObject { - val parser = Parser.default() - return parser.parse(StringBuilder(jsonStr)) as JsonObject -} +fun parseJsonStr(jsonStr: String): JsonObject = Json.parseToJsonElement(jsonStr).jsonObject diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt index cae6e759ce..a6d21d708d 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt @@ -1,9 +1,11 @@ package org.jetbrains.kotlinx.dataframe.io -import com.beust.klaxon.JsonArray -import com.beust.klaxon.JsonObject import io.kotest.matchers.shouldBe import io.kotest.matchers.string.shouldContain +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.jsonArray +import kotlinx.serialization.json.jsonObject +import kotlinx.serialization.json.jsonPrimitive import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME import org.jetbrains.kotlinx.dataframe.impl.io.resizeKeepingAspectRatio @@ -35,7 +37,7 @@ class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOp return } - val decodedImages = decodeImagesFromJson(json, images.size, encodingOptions!!) + val decodedImages = decodeImagesFromJson(json, images.size, encodingOptions) for ((decodedImage, original) in decodedImages.zip(images)) { val expectedImage = resizeIfNeeded(original, encodingOptions) @@ -67,8 +69,8 @@ class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOp private fun checkImagesEncodedAsToString(json: JsonObject, numImgs: Int) { for (i in 0..)[i] as JsonObject - val img = row["imgs"] as String + val row = json[KOTLIN_DATAFRAME]!!.jsonArray[i].jsonObject + val img = row["imgs"]?.jsonPrimitive?.content img shouldContain "BufferedImage" } @@ -81,8 +83,8 @@ class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOp ): List { val result = mutableListOf() for (i in 0..)[i] as JsonObject - val imgString = row["imgs"] as String + val row = json[KOTLIN_DATAFRAME]!!.jsonArray[i].jsonObject + val imgString = row["imgs"]!!.jsonPrimitive.content val bytes = decodeBase64Image(imgString, encodingOptions) val decodedImage = createImageFromBytes(bytes) @@ -156,7 +158,8 @@ class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOp private val DEFAULT = Base64ImageEncodingOptions() private val GZIP_ON_RESIZE_OFF = Base64ImageEncodingOptions(options = GZIP_ON) private val GZIP_OFF_RESIZE_OFF = Base64ImageEncodingOptions(options = ALL_OFF) - private val GZIP_ON_RESIZE_TO_700 = Base64ImageEncodingOptions(imageSizeLimit = 700, options = GZIP_ON or LIMIT_SIZE_ON) + private val GZIP_ON_RESIZE_TO_700 = + Base64ImageEncodingOptions(imageSizeLimit = 700, options = GZIP_ON or LIMIT_SIZE_ON) private val DISABLED = null @JvmStatic diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 6c4c218f81..6eb1025018 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -1,8 +1,5 @@ package org.jetbrains.kotlinx.dataframe.io -import com.beust.klaxon.JsonArray -import com.beust.klaxon.JsonObject -import com.beust.klaxon.Parser import io.kotest.assertions.throwables.shouldNotThrowAny import io.kotest.matchers.collections.shouldBeIn import io.kotest.matchers.shouldBe @@ -10,6 +7,11 @@ import io.kotest.matchers.string.shouldContain import io.kotest.matchers.string.shouldNotContain import io.kotest.matchers.types.instanceOf import io.kotest.matchers.types.shouldBeInstanceOf +import kotlinx.serialization.json.boolean +import kotlinx.serialization.json.int +import kotlinx.serialization.json.jsonArray +import kotlinx.serialization.json.jsonObject +import kotlinx.serialization.json.jsonPrimitive import org.intellij.lang.annotations.Language import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame @@ -24,14 +26,13 @@ import org.jetbrains.kotlinx.dataframe.api.forEach import org.jetbrains.kotlinx.dataframe.api.getColumnGroup import org.jetbrains.kotlinx.dataframe.api.getFrameColumn import org.jetbrains.kotlinx.dataframe.api.schema -import org.jetbrains.kotlinx.dataframe.api.toDouble import org.jetbrains.kotlinx.dataframe.api.toFloat import org.jetbrains.kotlinx.dataframe.api.toMap -import org.jetbrains.kotlinx.dataframe.api.with import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.get import org.jetbrains.kotlinx.dataframe.impl.io.SERIALIZATION_VERSION import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.DATA @@ -44,12 +45,11 @@ import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION import org.jetbrains.kotlinx.dataframe.impl.nothingType import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS +import org.jetbrains.kotlinx.dataframe.parseJsonStr import org.jetbrains.kotlinx.dataframe.testJson import org.jetbrains.kotlinx.dataframe.type import org.jetbrains.kotlinx.dataframe.values import org.junit.Test -import kotlin.math.exp -import kotlin.reflect.* import kotlin.reflect.typeOf class JsonTests { @@ -976,7 +976,6 @@ class JsonTests { } @Test - @Suppress("UNCHECKED_CAST") fun `json with metadata flat table`() { @Language("json") val data = """ @@ -986,24 +985,19 @@ class JsonTests { val jsonStr = df.toJsonWithMetadata(df.rowsCount()).trimIndent() val json = parseJsonStr(jsonStr) - json[VERSION] shouldBe SERIALIZATION_VERSION + json[VERSION]!!.jsonPrimitive.content shouldBe SERIALIZATION_VERSION - val metadata = (json[METADATA] as JsonObject) - metadata[NROW] shouldBe 1 - metadata[NCOL] shouldBe 4 - val columns = metadata[COLUMNS] as List + val metadata = json[METADATA]!!.jsonObject + metadata[NROW]!!.jsonPrimitive.int shouldBe 1 + metadata[NCOL]!!.jsonPrimitive.int shouldBe 4 + val columns = metadata[COLUMNS]!!.jsonArray.map { it.jsonPrimitive.content } columns shouldBe listOf("id", "node_id", "name", "full_name") - val decodedData = json[KOTLIN_DATAFRAME] as JsonArray<*> - val decodedDf = DataFrame.readJsonStr(decodedData.toJsonString()) + val decodedData = json[KOTLIN_DATAFRAME]!!.jsonArray + val decodedDf = DataFrame.readJsonStr(decodedData.toString()) decodedDf shouldBe df } - private fun parseJsonStr(jsonStr: String): JsonObject { - val parser = Parser.default() - return parser.parse(StringBuilder(jsonStr)) as JsonObject - } - @Test fun `json with metadata column group`() { @Language("json") @@ -1014,19 +1008,19 @@ class JsonTests { val jsonStr = df.toJsonWithMetadata(df.rowsCount()).trimIndent() val json = parseJsonStr(jsonStr) - val row = (json[KOTLIN_DATAFRAME] as JsonArray<*>)[0] as JsonObject + val row = json[KOTLIN_DATAFRAME]!!.jsonArray[0].jsonObject - val permissions = row["permissions"] as JsonObject - val metadata = permissions[METADATA] as JsonObject - metadata[KIND] shouldBe ColumnKind.Group.toString() + val permissions = row["permissions"]!!.jsonObject + val metadata = permissions[METADATA]!!.jsonObject + metadata[KIND]!!.jsonPrimitive.content shouldBe ColumnKind.Group.toString() - val decodedData = permissions[DATA] as JsonObject + val decodedData = permissions[DATA]!!.jsonObject - decodedData["admin"] shouldBe false - decodedData["maintain"] shouldBe false - decodedData["push"] shouldBe false - decodedData["triage"] shouldBe false - decodedData["pull"] shouldBe true + decodedData["admin"]!!.jsonPrimitive.boolean shouldBe false + decodedData["maintain"]!!.jsonPrimitive.boolean shouldBe false + decodedData["push"]!!.jsonPrimitive.boolean shouldBe false + decodedData["triage"]!!.jsonPrimitive.boolean shouldBe false + decodedData["pull"]!!.jsonPrimitive.boolean shouldBe true } @Test @@ -1034,19 +1028,19 @@ class JsonTests { val df = DataFrame.readJson(testJson("repositories")) val jsonStr = df.toJsonWithMetadata(df.rowsCount()).trimIndent() val json = parseJsonStr(jsonStr) - val row = (json[KOTLIN_DATAFRAME] as JsonArray<*>)[0] as JsonObject + val row = json[KOTLIN_DATAFRAME]!!.jsonArray[0].jsonObject - val contributors = row["contributors"] as JsonObject + val contributors = row["contributors"]!!.jsonObject - val metadata = contributors[METADATA] as JsonObject - metadata[KIND] shouldBe ColumnKind.Frame.toString() - metadata[NCOL] shouldBe 8 - metadata[NROW] shouldBe 29 + val metadata = contributors[METADATA]!!.jsonObject + metadata[KIND]!!.jsonPrimitive.content shouldBe ColumnKind.Frame.toString() + metadata[NCOL]!!.jsonPrimitive.int shouldBe 8 + metadata[NROW]!!.jsonPrimitive.int shouldBe 29 - val decodedData = contributors[DATA] as JsonArray<*> + val decodedData = contributors[DATA]!!.jsonArray decodedData.size shouldBe 29 - val decodedDf = DataFrame.readJsonStr(decodedData.toJsonString()) + val decodedDf = DataFrame.readJsonStr(decodedData.toString()) decodedDf shouldBe df[0]["contributors"] as AnyFrame } @@ -1056,16 +1050,16 @@ class JsonTests { val nestedFrameRowLimit = 20 val jsonStr = df.toJsonWithMetadata(df.rowsCount(), nestedFrameRowLimit).trimIndent() val json = parseJsonStr(jsonStr) - val row = (json[KOTLIN_DATAFRAME] as JsonArray<*>)[0] as JsonObject + val row = json[KOTLIN_DATAFRAME]!!.jsonArray[0].jsonObject - val contributors = row["contributors"] as JsonObject + val contributors = row["contributors"]!!.jsonObject - val metadata = contributors[METADATA] as JsonObject - metadata[KIND] shouldBe ColumnKind.Frame.toString() - metadata[NCOL] shouldBe 8 - metadata[NROW] shouldBe 29 + val metadata = contributors[METADATA]!!.jsonObject + metadata[KIND]!!.jsonPrimitive.content shouldBe ColumnKind.Frame.toString() + metadata[NCOL]!!.jsonPrimitive.int shouldBe 8 + metadata[NROW]!!.jsonPrimitive.int shouldBe 29 - val decodedData = contributors[DATA] as JsonArray<*> + val decodedData = contributors[DATA]!!.jsonArray decodedData.size shouldBe nestedFrameRowLimit } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt index 7435f064ad..f544c349b0 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt @@ -1,7 +1,6 @@ package org.jetbrains.kotlinx.dataframe.jupyter import io.kotest.assertions.throwables.shouldNotThrow -import io.kotest.matchers.collections.shouldContain import io.kotest.matchers.comparables.shouldBeGreaterThan import io.kotest.matchers.comparables.shouldBeLessThan import io.kotest.matchers.shouldBe @@ -12,6 +11,7 @@ import kotlinx.serialization.json.JsonArray import kotlinx.serialization.json.JsonObject import kotlinx.serialization.json.int import kotlinx.serialization.json.jsonArray +import kotlinx.serialization.json.jsonObject import kotlinx.serialization.json.jsonPrimitive import org.intellij.lang.annotations.Language import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.DATA @@ -119,15 +119,15 @@ class RenderingTests : JupyterReplTestCase() { } private fun assertDataFrameDimensions(json: JsonObject, expectedRows: Int, expectedColumns: Int) { - json.obj(METADATA)!!.int("nrow") shouldBe expectedRows - json.obj(METADATA)!!.int("ncol") shouldBe expectedColumns + json[METADATA]!!.jsonObject["nrow"]!!.jsonPrimitive.int shouldBe expectedRows + json[METADATA]!!.jsonObject["ncol"]!!.jsonPrimitive.int shouldBe expectedColumns } private fun parseDataframeJson(result: MimeTypedResult): JsonObject { return Json.decodeFromString(result["application/kotlindataframe+json"]!!) } - private fun JsonArray.getObj(index: Int) = this[index] as JsonObject + private fun JsonArray.getObj(index: Int) = this[index].jsonObject @Test fun `test kotlin notebook plugin utils sort by one column asc`() { @@ -228,7 +228,6 @@ class RenderingTests : JupyterReplTestCase() { df.group(col1, col2).into("group") """.trimIndent() ) - val jsonOutput = json.toJsonString(prettyPrint = true) val expectedOutput = """ { "${'$'}version": "2.1.0", @@ -362,7 +361,7 @@ class RenderingTests : JupyterReplTestCase() { }] } """.trimIndent() - jsonOutput shouldBe expectedOutput + json shouldBe Json.parseToJsonElement(expectedOutput) } @Test @@ -378,8 +377,8 @@ class RenderingTests : JupyterReplTestCase() { assertDataFrameDimensions(json, 2, 2) val rows = json[KOTLIN_DATAFRAME]!!.jsonArray - rows.getObj(0).get("group1")!!.jsonArray.size shouldBe 50 - rows.getObj(1).get("group1")!!.jsonArray.size shouldBe 50 + rows.getObj(0)["group1"]!!.jsonObject[DATA]!!.jsonArray.size shouldBe 10 + rows.getObj(1)["group1"]!!.jsonObject[DATA]!!.jsonArray.size shouldBe 10 } // Regression KTNB-424 diff --git a/dataframe-openapi/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/DefaultReadOpenApiMethod.kt b/dataframe-openapi/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/DefaultReadOpenApiMethod.kt index 90f8c6b03d..c523064a0e 100644 --- a/dataframe-openapi/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/DefaultReadOpenApiMethod.kt +++ b/dataframe-openapi/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/DefaultReadOpenApiMethod.kt @@ -21,6 +21,8 @@ import java.io.InputStream import java.net.URL import kotlin.reflect.typeOf +private const val valueColumnName: String = "value" + /** * Used to add `readJson` and `convertToMyMarker` functions to the generated interfaces. * Makes sure [convertDataRowsWithOpenApi] is always used in conversions. From 22bb293f5c6fd7af46a094908327d59365b2418a Mon Sep 17 00:00:00 2001 From: devcrocod Date: Wed, 12 Jun 2024 13:29:52 +0200 Subject: [PATCH 05/10] Update serialization library version and improve comments --- .../org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt | 4 ++-- .../org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt | 1 - gradle/libs.versions.toml | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt index 51230cc615..1acb76cc5d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt @@ -167,7 +167,7 @@ internal fun fromJsonListAnyColumns( @Suppress("KotlinConstantConditions") val columns: List = when { - // Create one column of type Any? (or guessed a primitive type) from all the records + // Create one column of type Any? (or guessed primitive type) from all the records colType == AnyColType.ANY -> { val collector: DataCollectorBase = if (justPrimitives) createDataCollector(records.size) // guess the type @@ -419,7 +419,7 @@ internal fun fromJsonListArrayAndValueColumns( // list element type can be JsonObject, JsonArray or primitive // So first, we gather all properties of objects to merge including "array" and "value" if needed - // so the resulting type of property with instances 123, ["abc"], and { "a": 1, "b": 2 } will be + // so the resulting type of a property with instances 123, ["abc"], and { "a": 1, "b": 2 } will be // { array: List, value: Int?, a: Int?, b: Int? } // and instances will look like // { "array": [], "value": 123, "a": null, "b": null } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt index c5d38f922d..21d6db85f3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt @@ -156,7 +156,6 @@ internal fun encodeRowWithMetadata( }.let { col.name to it } } if (values.isEmpty()) return null - JsonObject(mapOf("exampleKey" to JsonPrimitive("exampleValue"))) return JsonObject(values.toMap()) } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index db0bdb0925..62f595dcdd 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -26,7 +26,7 @@ kover = "0.6.1" commonsCsv = "1.10.0" commonsCompress = "1.26.0" -serialization = "1.6.2" +serialization = "1.7.0" fuel = "2.3.1" poi = "5.2.5" mariadb = "3.3.2" From 79c294eda0c3e5b23c103b3d9bbf49ac79e989db Mon Sep 17 00:00:00 2001 From: devcrocod Date: Wed, 12 Jun 2024 15:15:04 +0200 Subject: [PATCH 06/10] Refactor stream handling in guess read --- .../org/jetbrains/kotlinx/dataframe/io/guess.kt | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt index 1525c14461..8979590e56 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt @@ -206,21 +206,15 @@ internal fun DataFrame.Companion.read( formats: List = supportedFormats.filterIsInstance(), ): ReadAnyFrame { if (format != null) return format to format.readDataFrame(stream, header = header) - val input = NotCloseableStream(if (stream.markSupported()) stream else BufferedInputStream(stream)) - try { - val readLimit = 10000 - input.mark(readLimit) - + stream.use { input -> + val byteArray = input.readBytes() // read 8192 bytes formats.sortedBy { it.testOrder }.forEach { try { - input.reset() - return it to it.readDataFrame(input, header = header) - } catch (e: Exception) { + return it to it.readDataFrame(byteArray.inputStream(), header = header) + } catch (_: Exception) { } } throw IllegalArgumentException("Unknown stream format") - } finally { - input.doClose() } } From cf43401ecdcda97779b31d681239058cc49050e9 Mon Sep 17 00:00:00 2001 From: devcrocod Date: Thu, 13 Jun 2024 14:52:32 +0200 Subject: [PATCH 07/10] Refactor json element matching and little refactor encodeFrame --- .../org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt | 7 +++---- .../org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt | 8 +++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt index 1acb76cc5d..a61b23cc2a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt @@ -13,7 +13,6 @@ import kotlinx.serialization.json.floatOrNull import kotlinx.serialization.json.int import kotlinx.serialization.json.intOrNull import kotlinx.serialization.json.jsonArray -import kotlinx.serialization.json.jsonPrimitive import kotlinx.serialization.json.long import kotlinx.serialization.json.longOrNull import org.jetbrains.kotlinx.dataframe.AnyCol @@ -201,6 +200,8 @@ internal fun fromJsonListAnyColumns( ) } + is JsonNull -> collector.add(null) + is JsonPrimitive -> { when { v.content == "NaN" -> { @@ -214,7 +215,6 @@ internal fun fromJsonListAnyColumns( v.longOrNull != null -> collector.add(v.long) v.doubleOrNull != null -> collector.add(v.double) v.floatOrNull != null -> collector.add(v.float) - v.jsonPrimitive is JsonNull -> collector.add(null) } } @@ -513,6 +513,7 @@ internal fun fromJsonListArrayAndValueColumns( when (v) { is JsonObject -> collector.add(null) is JsonArray -> collector.add(null) + is JsonNull -> collector.add(null) is JsonPrimitive -> { when { v.content == "NaN" -> { @@ -526,8 +527,6 @@ internal fun fromJsonListArrayAndValueColumns( v.longOrNull != null -> collector.add(v.long) v.doubleOrNull != null -> collector.add(v.double) v.floatOrNull != null -> collector.add(v.float) - v is JsonNull -> collector.add(null) - else -> collector.add(v) } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt index 21d6db85f3..174cbbfaf2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt @@ -299,9 +299,11 @@ internal fun encodeFrame(frame: AnyFrame): JsonArray { val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame val data = frame.indices().map { rowIndex -> - valueColumn?.get(rowIndex) ?: arrayColumn?.get(rowIndex)?.let { - if (arraysAreFrames) encodeFrame(it as AnyFrame) else null - } ?: encodeRow(frame, rowIndex) + when { + valueColumn != null -> valueColumn[rowIndex] + arrayColumn != null -> arrayColumn[rowIndex]?.let { if (arraysAreFrames) encodeFrame(it as AnyFrame) else null } + else -> encodeRow(frame, rowIndex) + } } return buildJsonArray { addAll(data.map { convert(it) }) } From eda1884f407e8e553bc2690f01483aed5bfe5dc4 Mon Sep 17 00:00:00 2001 From: devcrocod Date: Thu, 13 Jun 2024 19:57:54 +0200 Subject: [PATCH 08/10] Generate new test files in a plugin --- .../testData/box/read.fir.ir.txt | 104 ++++++++++++++++-- .../testData/box/read.fir.txt | 35 +++++- .../testData/box/readCSV.fir.ir.txt | 104 ++++++++++++++++-- .../testData/box/readCSV.fir.txt | 35 +++++- .../testData/box/toDataFrame.fir.ir.txt | 4 + .../testData/box/toDataFrame_dsl.fir.ir.txt | 4 + 6 files changed, 250 insertions(+), 36 deletions(-) diff --git a/plugins/kotlin-dataframe/testData/box/read.fir.ir.txt b/plugins/kotlin-dataframe/testData/box/read.fir.ir.txt index 347ed4e3e3..bba13c355b 100644 --- a/plugins/kotlin-dataframe/testData/box/read.fir.ir.txt +++ b/plugins/kotlin-dataframe/testData/box/read.fir.ir.txt @@ -50,7 +50,7 @@ FILE fqName:org.jetbrains.kotlinx.dataframe fileName:/read.kt PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:topics visibility:public modality:ABSTRACT [val] annotations: Order(order = 3) - FUN DEFAULT_PROPERTY_ACCESSOR name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..Read_16I) returnType:kotlin.String + FUN DEFAULT_PROPERTY_ACCESSOR name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..Read_16I) returnType:org.jetbrains.kotlinx.dataframe.DataFrame.Topics_391> correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:topics visibility:public modality:ABSTRACT [val] $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Read_16I PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:watchers visibility:public modality:ABSTRACT [val] @@ -90,30 +90,30 @@ FILE fqName:org.jetbrains.kotlinx.dataframe fileName:/read.kt $this: GET_VAR ': org.jetbrains.kotlinx.dataframe.ColumnsContainer.Read_16I> declared in org.jetbrains.kotlinx.dataframe.box..Scope0.' type=org.jetbrains.kotlinx.dataframe.ColumnsContainer.Read_16I> origin=null columnName: CONST String type=kotlin.String value="full_name" PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:topics visibility:public modality:FINAL [val] - FIELD PROPERTY_BACKING_FIELD name:topics type:kotlin.String visibility:private [final] - FUN GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name: visibility:public modality:FINAL <> ($this:org.jetbrains.kotlinx.dataframe.box..Scope0, $receiver:org.jetbrains.kotlinx.dataframe.DataRow.Read_16I>) returnType:kotlin.String + FIELD PROPERTY_BACKING_FIELD name:topics type:org.jetbrains.kotlinx.dataframe.DataFrame.Topics_391> visibility:private [final] + FUN GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name: visibility:public modality:FINAL <> ($this:org.jetbrains.kotlinx.dataframe.box..Scope0, $receiver:org.jetbrains.kotlinx.dataframe.DataRow.Read_16I>) returnType:org.jetbrains.kotlinx.dataframe.DataFrame.Topics_391> annotations: JvmName(name = "Read_16I_topics") correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:topics visibility:public modality:FINAL [val] $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Scope0 $receiver: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.DataRow.Read_16I> BLOCK_BODY - RETURN type=kotlin.String from='public final fun (): kotlin.String declared in org.jetbrains.kotlinx.dataframe.box..Scope0' - TYPE_OP type=kotlin.String origin=CAST typeOperand=kotlin.String + RETURN type=org.jetbrains.kotlinx.dataframe.DataFrame.Topics_391> from='public final fun (): org.jetbrains.kotlinx.dataframe.DataFrame.Topics_391> declared in org.jetbrains.kotlinx.dataframe.box..Scope0' + TYPE_OP type=org.jetbrains.kotlinx.dataframe.DataFrame.Topics_391> origin=CAST typeOperand=org.jetbrains.kotlinx.dataframe.DataFrame.Topics_391> CALL 'public abstract fun get (name: kotlin.String): kotlin.Any? declared in org.jetbrains.kotlinx.dataframe.DataRow' type=kotlin.Any? origin=null $this: GET_VAR ': org.jetbrains.kotlinx.dataframe.DataRow.Read_16I> declared in org.jetbrains.kotlinx.dataframe.box..Scope0.' type=org.jetbrains.kotlinx.dataframe.DataRow.Read_16I> origin=null name: CONST String type=kotlin.String value="topics" PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:topics visibility:public modality:FINAL [val] - FIELD PROPERTY_BACKING_FIELD name:topics type:org.jetbrains.kotlinx.dataframe.DataColumn visibility:private [final] - FUN GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name: visibility:public modality:FINAL <> ($this:org.jetbrains.kotlinx.dataframe.box..Scope0, $receiver:org.jetbrains.kotlinx.dataframe.ColumnsContainer.Read_16I>) returnType:org.jetbrains.kotlinx.dataframe.DataColumn + FIELD PROPERTY_BACKING_FIELD name:topics type:org.jetbrains.kotlinx.dataframe.DataColumn.Topics_391>> visibility:private [final] + FUN GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name: visibility:public modality:FINAL <> ($this:org.jetbrains.kotlinx.dataframe.box..Scope0, $receiver:org.jetbrains.kotlinx.dataframe.ColumnsContainer.Read_16I>) returnType:org.jetbrains.kotlinx.dataframe.DataColumn.Topics_391>> annotations: JvmName(name = "Read_16I_topics") correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:topics visibility:public modality:FINAL [val] $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Scope0 $receiver: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.ColumnsContainer.Read_16I> BLOCK_BODY - RETURN type=org.jetbrains.kotlinx.dataframe.DataColumn from='public final fun (): org.jetbrains.kotlinx.dataframe.DataColumn declared in org.jetbrains.kotlinx.dataframe.box..Scope0' - TYPE_OP type=org.jetbrains.kotlinx.dataframe.DataColumn origin=CAST typeOperand=org.jetbrains.kotlinx.dataframe.DataColumn + RETURN type=org.jetbrains.kotlinx.dataframe.DataColumn.Topics_391>> from='public final fun (): org.jetbrains.kotlinx.dataframe.DataColumn.Topics_391>> declared in org.jetbrains.kotlinx.dataframe.box..Scope0' + TYPE_OP type=org.jetbrains.kotlinx.dataframe.DataColumn.Topics_391>> origin=CAST typeOperand=org.jetbrains.kotlinx.dataframe.DataColumn.Topics_391>> CALL 'public open fun get (columnName: kotlin.String): org.jetbrains.kotlinx.dataframe.DataColumn<*> declared in org.jetbrains.kotlinx.dataframe.ColumnsContainer' type=kotlin.Any? origin=null $this: GET_VAR ': org.jetbrains.kotlinx.dataframe.ColumnsContainer.Read_16I> declared in org.jetbrains.kotlinx.dataframe.box..Scope0.' type=org.jetbrains.kotlinx.dataframe.ColumnsContainer.Read_16I> origin=null columnName: CONST String type=kotlin.String value="topics" @@ -218,6 +218,78 @@ FILE fqName:org.jetbrains.kotlinx.dataframe fileName:/read.kt overridden: public open fun toString (): kotlin.String declared in kotlin.Any $this: VALUE_PARAMETER name: type:kotlin.Any + CLASS CLASS name:Topics_391 modality:ABSTRACT visibility:local superTypes:[kotlin.Any] + $this: VALUE_PARAMETER INSTANCE_RECEIVER name: type:org.jetbrains.kotlinx.dataframe.box..Topics_391 + CONSTRUCTOR GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] visibility:public <> () returnType:org.jetbrains.kotlinx.dataframe.box..Topics_391 [primary] + BLOCK_BODY + DELEGATING_CONSTRUCTOR_CALL 'public constructor () declared in kotlin.Any' + INSTANCE_INITIALIZER_CALL classDescriptor='CLASS CLASS name:Topics_391 modality:ABSTRACT visibility:local superTypes:[kotlin.Any]' + FUN FAKE_OVERRIDE name:equals visibility:public modality:OPEN <> ($this:kotlin.Any, other:kotlin.Any?) returnType:kotlin.Boolean [fake_override,operator] + overridden: + public open fun equals (other: kotlin.Any?): kotlin.Boolean declared in kotlin.Any + $this: VALUE_PARAMETER name: type:kotlin.Any + VALUE_PARAMETER name:other index:0 type:kotlin.Any? + FUN FAKE_OVERRIDE name:hashCode visibility:public modality:OPEN <> ($this:kotlin.Any) returnType:kotlin.Int [fake_override] + overridden: + public open fun hashCode (): kotlin.Int declared in kotlin.Any + $this: VALUE_PARAMETER name: type:kotlin.Any + FUN FAKE_OVERRIDE name:toString visibility:public modality:OPEN <> ($this:kotlin.Any) returnType:kotlin.String [fake_override] + overridden: + public open fun toString (): kotlin.String declared in kotlin.Any + $this: VALUE_PARAMETER name: type:kotlin.Any + PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:value visibility:public modality:ABSTRACT [val] + annotations: + Order(order = 0) + FUN DEFAULT_PROPERTY_ACCESSOR name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..Topics_391) returnType:kotlin.Double + correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:value visibility:public modality:ABSTRACT [val] + $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Topics_391 + CLASS CLASS name:Scope1 modality:FINAL visibility:local superTypes:[kotlin.Any] + $this: VALUE_PARAMETER INSTANCE_RECEIVER name: type:org.jetbrains.kotlinx.dataframe.box..Scope1 + PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:value visibility:public modality:FINAL [val] + FIELD PROPERTY_BACKING_FIELD name:value type:kotlin.Double visibility:private [final] + FUN GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name: visibility:public modality:FINAL <> ($this:org.jetbrains.kotlinx.dataframe.box..Scope1, $receiver:org.jetbrains.kotlinx.dataframe.DataRow.Topics_391>) returnType:kotlin.Double + annotations: + JvmName(name = "Topics_391_value") + correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:value visibility:public modality:FINAL [val] + $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Scope1 + $receiver: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.DataRow.Topics_391> + BLOCK_BODY + RETURN type=kotlin.Double from='public final fun (): kotlin.Double declared in org.jetbrains.kotlinx.dataframe.box..Scope1' + TYPE_OP type=kotlin.Double origin=CAST typeOperand=kotlin.Double + CALL 'public abstract fun get (name: kotlin.String): kotlin.Any? declared in org.jetbrains.kotlinx.dataframe.DataRow' type=kotlin.Any? origin=null + $this: GET_VAR ': org.jetbrains.kotlinx.dataframe.DataRow.Topics_391> declared in org.jetbrains.kotlinx.dataframe.box..Scope1.' type=org.jetbrains.kotlinx.dataframe.DataRow.Topics_391> origin=null + name: CONST String type=kotlin.String value="value" + PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:value visibility:public modality:FINAL [val] + FIELD PROPERTY_BACKING_FIELD name:value type:org.jetbrains.kotlinx.dataframe.DataColumn visibility:private [final] + FUN GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name: visibility:public modality:FINAL <> ($this:org.jetbrains.kotlinx.dataframe.box..Scope1, $receiver:org.jetbrains.kotlinx.dataframe.ColumnsContainer.Topics_391>) returnType:org.jetbrains.kotlinx.dataframe.DataColumn + annotations: + JvmName(name = "Topics_391_value") + correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:value visibility:public modality:FINAL [val] + $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Scope1 + $receiver: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.ColumnsContainer.Topics_391> + BLOCK_BODY + RETURN type=org.jetbrains.kotlinx.dataframe.DataColumn from='public final fun (): org.jetbrains.kotlinx.dataframe.DataColumn declared in org.jetbrains.kotlinx.dataframe.box..Scope1' + TYPE_OP type=org.jetbrains.kotlinx.dataframe.DataColumn origin=CAST typeOperand=org.jetbrains.kotlinx.dataframe.DataColumn + CALL 'public open fun get (columnName: kotlin.String): org.jetbrains.kotlinx.dataframe.DataColumn<*> declared in org.jetbrains.kotlinx.dataframe.ColumnsContainer' type=kotlin.Any? origin=null + $this: GET_VAR ': org.jetbrains.kotlinx.dataframe.ColumnsContainer.Topics_391> declared in org.jetbrains.kotlinx.dataframe.box..Scope1.' type=org.jetbrains.kotlinx.dataframe.ColumnsContainer.Topics_391> origin=null + columnName: CONST String type=kotlin.String value="value" + CONSTRUCTOR GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] visibility:public <> () returnType:org.jetbrains.kotlinx.dataframe.box..Scope1 [primary] + BLOCK_BODY + DELEGATING_CONSTRUCTOR_CALL 'public constructor () declared in kotlin.Any' + INSTANCE_INITIALIZER_CALL classDescriptor='CLASS CLASS name:Scope1 modality:FINAL visibility:local superTypes:[kotlin.Any]' + FUN FAKE_OVERRIDE name:equals visibility:public modality:OPEN <> ($this:kotlin.Any, other:kotlin.Any?) returnType:kotlin.Boolean [fake_override,operator] + overridden: + public open fun equals (other: kotlin.Any?): kotlin.Boolean declared in kotlin.Any + $this: VALUE_PARAMETER name: type:kotlin.Any + VALUE_PARAMETER name:other index:0 type:kotlin.Any? + FUN FAKE_OVERRIDE name:hashCode visibility:public modality:OPEN <> ($this:kotlin.Any) returnType:kotlin.Int [fake_override] + overridden: + public open fun hashCode (): kotlin.Int declared in kotlin.Any + $this: VALUE_PARAMETER name: type:kotlin.Any + FUN FAKE_OVERRIDE name:toString visibility:public modality:OPEN <> ($this:kotlin.Any) returnType:kotlin.String [fake_override] + overridden: + public open fun toString (): kotlin.String declared in kotlin.Any + $this: VALUE_PARAMETER name: type:kotlin.Any CLASS CLASS name:Read_16 modality:ABSTRACT visibility:local superTypes:[org.jetbrains.kotlinx.dataframe.box..Read_16I] $this: VALUE_PARAMETER INSTANCE_RECEIVER name: type:org.jetbrains.kotlinx.dataframe.box..Read_16 CONSTRUCTOR GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] visibility:public <> () returnType:org.jetbrains.kotlinx.dataframe.box..Read_16 [primary] @@ -271,11 +343,11 @@ FILE fqName:org.jetbrains.kotlinx.dataframe fileName:/read.kt annotations: Order(order = 3) overridden: - public abstract topics: kotlin.String declared in org.jetbrains.kotlinx.dataframe.box..Read_16I - FUN FAKE_OVERRIDE name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..Read_16I) returnType:kotlin.String [fake_override] + public abstract topics: org.jetbrains.kotlinx.dataframe.DataFrame.Topics_391> declared in org.jetbrains.kotlinx.dataframe.box..Read_16I + FUN FAKE_OVERRIDE name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..Read_16I) returnType:org.jetbrains.kotlinx.dataframe.DataFrame.Topics_391> [fake_override] correspondingProperty: PROPERTY FAKE_OVERRIDE name:topics visibility:public modality:ABSTRACT [fake_override,val] overridden: - public abstract fun (): kotlin.String declared in org.jetbrains.kotlinx.dataframe.box..Read_16I + public abstract fun (): org.jetbrains.kotlinx.dataframe.DataFrame.Topics_391> declared in org.jetbrains.kotlinx.dataframe.box..Read_16I $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Read_16I PROPERTY FAKE_OVERRIDE name:watchers visibility:public modality:ABSTRACT [fake_override,val] annotations: @@ -295,6 +367,14 @@ FILE fqName:org.jetbrains.kotlinx.dataframe fileName:/read.kt correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:scope0 visibility:public modality:ABSTRACT [var] $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Read_16 VALUE_PARAMETER name: index:0 type:org.jetbrains.kotlinx.dataframe.box..Scope0 + PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:scope1 visibility:public modality:ABSTRACT [var] + FUN DEFAULT_PROPERTY_ACCESSOR name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..Read_16) returnType:org.jetbrains.kotlinx.dataframe.box..Scope1 + correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:scope1 visibility:public modality:ABSTRACT [var] + $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Read_16 + FUN DEFAULT_PROPERTY_ACCESSOR name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..Read_16, :org.jetbrains.kotlinx.dataframe.box..Scope1) returnType:kotlin.Unit + correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:scope1 visibility:public modality:ABSTRACT [var] + $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Read_16 + VALUE_PARAMETER name: index:0 type:org.jetbrains.kotlinx.dataframe.box..Scope1 RETURN type=kotlin.Nothing from='local final fun (it: org.jetbrains.kotlinx.dataframe.DataFrame.Companion): org.jetbrains.kotlinx.dataframe.DataFrame.Read_16> declared in org.jetbrains.kotlinx.dataframe.box' CALL 'public final fun read (path: kotlin.String, header: kotlin.collections.List): org.jetbrains.kotlinx.dataframe.DataFrame<*> declared in org.jetbrains.kotlinx.dataframe.io' type=org.jetbrains.kotlinx.dataframe.DataFrame.Read_16> origin=null $receiver: GET_VAR 'it: org.jetbrains.kotlinx.dataframe.DataFrame.Companion declared in org.jetbrains.kotlinx.dataframe.box.' type=org.jetbrains.kotlinx.dataframe.DataFrame.Companion origin=null diff --git a/plugins/kotlin-dataframe/testData/box/read.fir.txt b/plugins/kotlin-dataframe/testData/box/read.fir.txt index 6f66eb98a4..fc2ff0b90d 100644 --- a/plugins/kotlin-dataframe/testData/box/read.fir.txt +++ b/plugins/kotlin-dataframe/testData/box/read.fir.txt @@ -7,8 +7,8 @@ FILE: read.kt @R|org/jetbrains/kotlinx/dataframe/annotations/Order|(order = Int(0)) public abstract val full_name: R|kotlin/String| public get(): R|kotlin/String| - @R|org/jetbrains/kotlinx/dataframe/annotations/Order|(order = Int(3)) public abstract val topics: R|kotlin/String| - public get(): R|kotlin/String| + @R|org/jetbrains/kotlinx/dataframe/annotations/Order|(order = Int(3)) public abstract val topics: R|org/jetbrains/kotlinx/dataframe/DataFrame</Topics_391>| + public get(): R|org/jetbrains/kotlinx/dataframe/DataFrame</Topics_391>| @R|org/jetbrains/kotlinx/dataframe/annotations/Order|(order = Int(4)) public abstract val watchers: R|kotlin/Int| public get(): R|kotlin/Int| @@ -30,11 +30,11 @@ FILE: read.kt public final val R|org/jetbrains/kotlinx/dataframe/ColumnsContainer</Read_16I>|.full_name: R|org/jetbrains/kotlinx/dataframe/DataColumn| public get(): R|org/jetbrains/kotlinx/dataframe/DataColumn| - public final val R|org/jetbrains/kotlinx/dataframe/DataRow</Read_16I>|.topics: R|kotlin/String| - public get(): R|kotlin/String| + public final val R|org/jetbrains/kotlinx/dataframe/DataRow</Read_16I>|.topics: R|org/jetbrains/kotlinx/dataframe/DataFrame</Topics_391>| + public get(): R|org/jetbrains/kotlinx/dataframe/DataFrame</Topics_391>| - public final val R|org/jetbrains/kotlinx/dataframe/ColumnsContainer</Read_16I>|.topics: R|org/jetbrains/kotlinx/dataframe/DataColumn| - public get(): R|org/jetbrains/kotlinx/dataframe/DataColumn| + public final val R|org/jetbrains/kotlinx/dataframe/ColumnsContainer</Read_16I>|.topics: R|org/jetbrains/kotlinx/dataframe/DataColumn/Topics_391>>| + public get(): R|org/jetbrains/kotlinx/dataframe/DataColumn/Topics_391>>| public final val R|org/jetbrains/kotlinx/dataframe/DataRow</Read_16I>|.watchers: R|kotlin/Int| public get(): R|kotlin/Int| @@ -58,11 +58,34 @@ FILE: read.kt } + local abstract class Topics_391 : R|kotlin/Any| { + @R|org/jetbrains/kotlinx/dataframe/annotations/Order|(order = Int(0)) public abstract val value: R|kotlin/Double| + public get(): R|kotlin/Double| + + public constructor(): R|/Topics_391| + + } + + local final class Scope1 : R|kotlin/Any| { + public final val R|org/jetbrains/kotlinx/dataframe/DataRow</Topics_391>|.value: R|kotlin/Double| + public get(): R|kotlin/Double| + + public final val R|org/jetbrains/kotlinx/dataframe/ColumnsContainer</Topics_391>|.value: R|org/jetbrains/kotlinx/dataframe/DataColumn| + public get(): R|org/jetbrains/kotlinx/dataframe/DataColumn| + + public constructor(): R|/Scope1| + + } + local abstract class Read_16 : R|/Read_16I| { public abstract var scope0: R|/Scope0| public get(): R|/Scope0| public set(value: R|/Scope0|): R|kotlin/Unit| + public abstract var scope1: R|/Scope1| + public get(): R|/Scope1| + public set(value: R|/Scope1|): R|kotlin/Unit| + public constructor(): R|/Read_16| } diff --git a/plugins/kotlin-dataframe/testData/box/readCSV.fir.ir.txt b/plugins/kotlin-dataframe/testData/box/readCSV.fir.ir.txt index 58295a9e02..758647482e 100644 --- a/plugins/kotlin-dataframe/testData/box/readCSV.fir.ir.txt +++ b/plugins/kotlin-dataframe/testData/box/readCSV.fir.ir.txt @@ -50,7 +50,7 @@ FILE fqName:org.jetbrains.kotlinx.dataframe fileName:/readCSV.kt PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:topics visibility:public modality:ABSTRACT [val] annotations: Order(order = 3) - FUN DEFAULT_PROPERTY_ACCESSOR name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..ReadCSV_74I) returnType:kotlin.String + FUN DEFAULT_PROPERTY_ACCESSOR name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..ReadCSV_74I) returnType:org.jetbrains.kotlinx.dataframe.DataFrame.Topics_311> correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:topics visibility:public modality:ABSTRACT [val] $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..ReadCSV_74I PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:watchers visibility:public modality:ABSTRACT [val] @@ -90,30 +90,30 @@ FILE fqName:org.jetbrains.kotlinx.dataframe fileName:/readCSV.kt $this: GET_VAR ': org.jetbrains.kotlinx.dataframe.ColumnsContainer.ReadCSV_74I> declared in org.jetbrains.kotlinx.dataframe.box..Scope0.' type=org.jetbrains.kotlinx.dataframe.ColumnsContainer.ReadCSV_74I> origin=null columnName: CONST String type=kotlin.String value="full_name" PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:topics visibility:public modality:FINAL [val] - FIELD PROPERTY_BACKING_FIELD name:topics type:kotlin.String visibility:private [final] - FUN GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name: visibility:public modality:FINAL <> ($this:org.jetbrains.kotlinx.dataframe.box..Scope0, $receiver:org.jetbrains.kotlinx.dataframe.DataRow.ReadCSV_74I>) returnType:kotlin.String + FIELD PROPERTY_BACKING_FIELD name:topics type:org.jetbrains.kotlinx.dataframe.DataFrame.Topics_311> visibility:private [final] + FUN GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name: visibility:public modality:FINAL <> ($this:org.jetbrains.kotlinx.dataframe.box..Scope0, $receiver:org.jetbrains.kotlinx.dataframe.DataRow.ReadCSV_74I>) returnType:org.jetbrains.kotlinx.dataframe.DataFrame.Topics_311> annotations: JvmName(name = "ReadCSV_74I_topics") correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:topics visibility:public modality:FINAL [val] $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Scope0 $receiver: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.DataRow.ReadCSV_74I> BLOCK_BODY - RETURN type=kotlin.String from='public final fun (): kotlin.String declared in org.jetbrains.kotlinx.dataframe.box..Scope0' - TYPE_OP type=kotlin.String origin=CAST typeOperand=kotlin.String + RETURN type=org.jetbrains.kotlinx.dataframe.DataFrame.Topics_311> from='public final fun (): org.jetbrains.kotlinx.dataframe.DataFrame.Topics_311> declared in org.jetbrains.kotlinx.dataframe.box..Scope0' + TYPE_OP type=org.jetbrains.kotlinx.dataframe.DataFrame.Topics_311> origin=CAST typeOperand=org.jetbrains.kotlinx.dataframe.DataFrame.Topics_311> CALL 'public abstract fun get (name: kotlin.String): kotlin.Any? declared in org.jetbrains.kotlinx.dataframe.DataRow' type=kotlin.Any? origin=null $this: GET_VAR ': org.jetbrains.kotlinx.dataframe.DataRow.ReadCSV_74I> declared in org.jetbrains.kotlinx.dataframe.box..Scope0.' type=org.jetbrains.kotlinx.dataframe.DataRow.ReadCSV_74I> origin=null name: CONST String type=kotlin.String value="topics" PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:topics visibility:public modality:FINAL [val] - FIELD PROPERTY_BACKING_FIELD name:topics type:org.jetbrains.kotlinx.dataframe.DataColumn visibility:private [final] - FUN GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name: visibility:public modality:FINAL <> ($this:org.jetbrains.kotlinx.dataframe.box..Scope0, $receiver:org.jetbrains.kotlinx.dataframe.ColumnsContainer.ReadCSV_74I>) returnType:org.jetbrains.kotlinx.dataframe.DataColumn + FIELD PROPERTY_BACKING_FIELD name:topics type:org.jetbrains.kotlinx.dataframe.DataColumn.Topics_311>> visibility:private [final] + FUN GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name: visibility:public modality:FINAL <> ($this:org.jetbrains.kotlinx.dataframe.box..Scope0, $receiver:org.jetbrains.kotlinx.dataframe.ColumnsContainer.ReadCSV_74I>) returnType:org.jetbrains.kotlinx.dataframe.DataColumn.Topics_311>> annotations: JvmName(name = "ReadCSV_74I_topics") correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:topics visibility:public modality:FINAL [val] $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Scope0 $receiver: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.ColumnsContainer.ReadCSV_74I> BLOCK_BODY - RETURN type=org.jetbrains.kotlinx.dataframe.DataColumn from='public final fun (): org.jetbrains.kotlinx.dataframe.DataColumn declared in org.jetbrains.kotlinx.dataframe.box..Scope0' - TYPE_OP type=org.jetbrains.kotlinx.dataframe.DataColumn origin=CAST typeOperand=org.jetbrains.kotlinx.dataframe.DataColumn + RETURN type=org.jetbrains.kotlinx.dataframe.DataColumn.Topics_311>> from='public final fun (): org.jetbrains.kotlinx.dataframe.DataColumn.Topics_311>> declared in org.jetbrains.kotlinx.dataframe.box..Scope0' + TYPE_OP type=org.jetbrains.kotlinx.dataframe.DataColumn.Topics_311>> origin=CAST typeOperand=org.jetbrains.kotlinx.dataframe.DataColumn.Topics_311>> CALL 'public open fun get (columnName: kotlin.String): org.jetbrains.kotlinx.dataframe.DataColumn<*> declared in org.jetbrains.kotlinx.dataframe.ColumnsContainer' type=kotlin.Any? origin=null $this: GET_VAR ': org.jetbrains.kotlinx.dataframe.ColumnsContainer.ReadCSV_74I> declared in org.jetbrains.kotlinx.dataframe.box..Scope0.' type=org.jetbrains.kotlinx.dataframe.ColumnsContainer.ReadCSV_74I> origin=null columnName: CONST String type=kotlin.String value="topics" @@ -218,6 +218,78 @@ FILE fqName:org.jetbrains.kotlinx.dataframe fileName:/readCSV.kt overridden: public open fun toString (): kotlin.String declared in kotlin.Any $this: VALUE_PARAMETER name: type:kotlin.Any + CLASS CLASS name:Topics_311 modality:ABSTRACT visibility:local superTypes:[kotlin.Any] + $this: VALUE_PARAMETER INSTANCE_RECEIVER name: type:org.jetbrains.kotlinx.dataframe.box..Topics_311 + CONSTRUCTOR GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] visibility:public <> () returnType:org.jetbrains.kotlinx.dataframe.box..Topics_311 [primary] + BLOCK_BODY + DELEGATING_CONSTRUCTOR_CALL 'public constructor () declared in kotlin.Any' + INSTANCE_INITIALIZER_CALL classDescriptor='CLASS CLASS name:Topics_311 modality:ABSTRACT visibility:local superTypes:[kotlin.Any]' + FUN FAKE_OVERRIDE name:equals visibility:public modality:OPEN <> ($this:kotlin.Any, other:kotlin.Any?) returnType:kotlin.Boolean [fake_override,operator] + overridden: + public open fun equals (other: kotlin.Any?): kotlin.Boolean declared in kotlin.Any + $this: VALUE_PARAMETER name: type:kotlin.Any + VALUE_PARAMETER name:other index:0 type:kotlin.Any? + FUN FAKE_OVERRIDE name:hashCode visibility:public modality:OPEN <> ($this:kotlin.Any) returnType:kotlin.Int [fake_override] + overridden: + public open fun hashCode (): kotlin.Int declared in kotlin.Any + $this: VALUE_PARAMETER name: type:kotlin.Any + FUN FAKE_OVERRIDE name:toString visibility:public modality:OPEN <> ($this:kotlin.Any) returnType:kotlin.String [fake_override] + overridden: + public open fun toString (): kotlin.String declared in kotlin.Any + $this: VALUE_PARAMETER name: type:kotlin.Any + PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:value visibility:public modality:ABSTRACT [val] + annotations: + Order(order = 0) + FUN DEFAULT_PROPERTY_ACCESSOR name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..Topics_311) returnType:kotlin.Double + correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:value visibility:public modality:ABSTRACT [val] + $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Topics_311 + CLASS CLASS name:Scope1 modality:FINAL visibility:local superTypes:[kotlin.Any] + $this: VALUE_PARAMETER INSTANCE_RECEIVER name: type:org.jetbrains.kotlinx.dataframe.box..Scope1 + PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:value visibility:public modality:FINAL [val] + FIELD PROPERTY_BACKING_FIELD name:value type:kotlin.Double visibility:private [final] + FUN GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name: visibility:public modality:FINAL <> ($this:org.jetbrains.kotlinx.dataframe.box..Scope1, $receiver:org.jetbrains.kotlinx.dataframe.DataRow.Topics_311>) returnType:kotlin.Double + annotations: + JvmName(name = "Topics_311_value") + correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:value visibility:public modality:FINAL [val] + $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Scope1 + $receiver: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.DataRow.Topics_311> + BLOCK_BODY + RETURN type=kotlin.Double from='public final fun (): kotlin.Double declared in org.jetbrains.kotlinx.dataframe.box..Scope1' + TYPE_OP type=kotlin.Double origin=CAST typeOperand=kotlin.Double + CALL 'public abstract fun get (name: kotlin.String): kotlin.Any? declared in org.jetbrains.kotlinx.dataframe.DataRow' type=kotlin.Any? origin=null + $this: GET_VAR ': org.jetbrains.kotlinx.dataframe.DataRow.Topics_311> declared in org.jetbrains.kotlinx.dataframe.box..Scope1.' type=org.jetbrains.kotlinx.dataframe.DataRow.Topics_311> origin=null + name: CONST String type=kotlin.String value="value" + PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:value visibility:public modality:FINAL [val] + FIELD PROPERTY_BACKING_FIELD name:value type:org.jetbrains.kotlinx.dataframe.DataColumn visibility:private [final] + FUN GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name: visibility:public modality:FINAL <> ($this:org.jetbrains.kotlinx.dataframe.box..Scope1, $receiver:org.jetbrains.kotlinx.dataframe.ColumnsContainer.Topics_311>) returnType:org.jetbrains.kotlinx.dataframe.DataColumn + annotations: + JvmName(name = "Topics_311_value") + correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] name:value visibility:public modality:FINAL [val] + $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..Scope1 + $receiver: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.ColumnsContainer.Topics_311> + BLOCK_BODY + RETURN type=org.jetbrains.kotlinx.dataframe.DataColumn from='public final fun (): org.jetbrains.kotlinx.dataframe.DataColumn declared in org.jetbrains.kotlinx.dataframe.box..Scope1' + TYPE_OP type=org.jetbrains.kotlinx.dataframe.DataColumn origin=CAST typeOperand=org.jetbrains.kotlinx.dataframe.DataColumn + CALL 'public open fun get (columnName: kotlin.String): org.jetbrains.kotlinx.dataframe.DataColumn<*> declared in org.jetbrains.kotlinx.dataframe.ColumnsContainer' type=kotlin.Any? origin=null + $this: GET_VAR ': org.jetbrains.kotlinx.dataframe.ColumnsContainer.Topics_311> declared in org.jetbrains.kotlinx.dataframe.box..Scope1.' type=org.jetbrains.kotlinx.dataframe.ColumnsContainer.Topics_311> origin=null + columnName: CONST String type=kotlin.String value="value" + CONSTRUCTOR GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] visibility:public <> () returnType:org.jetbrains.kotlinx.dataframe.box..Scope1 [primary] + BLOCK_BODY + DELEGATING_CONSTRUCTOR_CALL 'public constructor () declared in kotlin.Any' + INSTANCE_INITIALIZER_CALL classDescriptor='CLASS CLASS name:Scope1 modality:FINAL visibility:local superTypes:[kotlin.Any]' + FUN FAKE_OVERRIDE name:equals visibility:public modality:OPEN <> ($this:kotlin.Any, other:kotlin.Any?) returnType:kotlin.Boolean [fake_override,operator] + overridden: + public open fun equals (other: kotlin.Any?): kotlin.Boolean declared in kotlin.Any + $this: VALUE_PARAMETER name: type:kotlin.Any + VALUE_PARAMETER name:other index:0 type:kotlin.Any? + FUN FAKE_OVERRIDE name:hashCode visibility:public modality:OPEN <> ($this:kotlin.Any) returnType:kotlin.Int [fake_override] + overridden: + public open fun hashCode (): kotlin.Int declared in kotlin.Any + $this: VALUE_PARAMETER name: type:kotlin.Any + FUN FAKE_OVERRIDE name:toString visibility:public modality:OPEN <> ($this:kotlin.Any) returnType:kotlin.String [fake_override] + overridden: + public open fun toString (): kotlin.String declared in kotlin.Any + $this: VALUE_PARAMETER name: type:kotlin.Any CLASS CLASS name:ReadCSV_74 modality:ABSTRACT visibility:local superTypes:[org.jetbrains.kotlinx.dataframe.box..ReadCSV_74I] $this: VALUE_PARAMETER INSTANCE_RECEIVER name: type:org.jetbrains.kotlinx.dataframe.box..ReadCSV_74 CONSTRUCTOR GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.DataFramePlugin] visibility:public <> () returnType:org.jetbrains.kotlinx.dataframe.box..ReadCSV_74 [primary] @@ -271,11 +343,11 @@ FILE fqName:org.jetbrains.kotlinx.dataframe fileName:/readCSV.kt annotations: Order(order = 3) overridden: - public abstract topics: kotlin.String declared in org.jetbrains.kotlinx.dataframe.box..ReadCSV_74I - FUN FAKE_OVERRIDE name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..ReadCSV_74I) returnType:kotlin.String [fake_override] + public abstract topics: org.jetbrains.kotlinx.dataframe.DataFrame.Topics_311> declared in org.jetbrains.kotlinx.dataframe.box..ReadCSV_74I + FUN FAKE_OVERRIDE name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..ReadCSV_74I) returnType:org.jetbrains.kotlinx.dataframe.DataFrame.Topics_311> [fake_override] correspondingProperty: PROPERTY FAKE_OVERRIDE name:topics visibility:public modality:ABSTRACT [fake_override,val] overridden: - public abstract fun (): kotlin.String declared in org.jetbrains.kotlinx.dataframe.box..ReadCSV_74I + public abstract fun (): org.jetbrains.kotlinx.dataframe.DataFrame.Topics_311> declared in org.jetbrains.kotlinx.dataframe.box..ReadCSV_74I $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..ReadCSV_74I PROPERTY FAKE_OVERRIDE name:watchers visibility:public modality:ABSTRACT [fake_override,val] annotations: @@ -295,6 +367,14 @@ FILE fqName:org.jetbrains.kotlinx.dataframe fileName:/readCSV.kt correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:scope0 visibility:public modality:ABSTRACT [var] $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..ReadCSV_74 VALUE_PARAMETER name: index:0 type:org.jetbrains.kotlinx.dataframe.box..Scope0 + PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:scope1 visibility:public modality:ABSTRACT [var] + FUN DEFAULT_PROPERTY_ACCESSOR name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..ReadCSV_74) returnType:org.jetbrains.kotlinx.dataframe.box..Scope1 + correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:scope1 visibility:public modality:ABSTRACT [var] + $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..ReadCSV_74 + FUN DEFAULT_PROPERTY_ACCESSOR name: visibility:public modality:ABSTRACT <> ($this:org.jetbrains.kotlinx.dataframe.box..ReadCSV_74, :org.jetbrains.kotlinx.dataframe.box..Scope1) returnType:kotlin.Unit + correspondingProperty: PROPERTY GENERATED[org.jetbrains.kotlinx.dataframe.plugin.extensions.TokenGenerator.Key] name:scope1 visibility:public modality:ABSTRACT [var] + $this: VALUE_PARAMETER name: type:org.jetbrains.kotlinx.dataframe.box..ReadCSV_74 + VALUE_PARAMETER name: index:0 type:org.jetbrains.kotlinx.dataframe.box..Scope1 RETURN type=kotlin.Nothing from='local final fun (it: org.jetbrains.kotlinx.dataframe.DataFrame.Companion): org.jetbrains.kotlinx.dataframe.DataFrame.ReadCSV_74> declared in org.jetbrains.kotlinx.dataframe.box' CALL 'public final fun readCSV (fileOrUrl: kotlin.String, delimiter: kotlin.Char, header: kotlin.collections.List, colTypes: kotlin.collections.Map, skipLines: kotlin.Int, readLines: kotlin.Int?, duplicate: kotlin.Boolean, charset: java.nio.charset.Charset, parserOptions: org.jetbrains.kotlinx.dataframe.api.ParserOptions?): org.jetbrains.kotlinx.dataframe.DataFrame<*> declared in org.jetbrains.kotlinx.dataframe.io' type=org.jetbrains.kotlinx.dataframe.DataFrame.ReadCSV_74> origin=null $receiver: GET_VAR 'it: org.jetbrains.kotlinx.dataframe.DataFrame.Companion declared in org.jetbrains.kotlinx.dataframe.box.' type=org.jetbrains.kotlinx.dataframe.DataFrame.Companion origin=null diff --git a/plugins/kotlin-dataframe/testData/box/readCSV.fir.txt b/plugins/kotlin-dataframe/testData/box/readCSV.fir.txt index daf183e80a..b5b2789aad 100644 --- a/plugins/kotlin-dataframe/testData/box/readCSV.fir.txt +++ b/plugins/kotlin-dataframe/testData/box/readCSV.fir.txt @@ -7,8 +7,8 @@ FILE: readCSV.kt @R|org/jetbrains/kotlinx/dataframe/annotations/Order|(order = Int(0)) public abstract val full_name: R|kotlin/String| public get(): R|kotlin/String| - @R|org/jetbrains/kotlinx/dataframe/annotations/Order|(order = Int(3)) public abstract val topics: R|kotlin/String| - public get(): R|kotlin/String| + @R|org/jetbrains/kotlinx/dataframe/annotations/Order|(order = Int(3)) public abstract val topics: R|org/jetbrains/kotlinx/dataframe/DataFrame</Topics_311>| + public get(): R|org/jetbrains/kotlinx/dataframe/DataFrame</Topics_311>| @R|org/jetbrains/kotlinx/dataframe/annotations/Order|(order = Int(4)) public abstract val watchers: R|kotlin/Int| public get(): R|kotlin/Int| @@ -30,11 +30,11 @@ FILE: readCSV.kt public final val R|org/jetbrains/kotlinx/dataframe/ColumnsContainer</ReadCSV_74I>|.full_name: R|org/jetbrains/kotlinx/dataframe/DataColumn| public get(): R|org/jetbrains/kotlinx/dataframe/DataColumn| - public final val R|org/jetbrains/kotlinx/dataframe/DataRow</ReadCSV_74I>|.topics: R|kotlin/String| - public get(): R|kotlin/String| + public final val R|org/jetbrains/kotlinx/dataframe/DataRow</ReadCSV_74I>|.topics: R|org/jetbrains/kotlinx/dataframe/DataFrame</Topics_311>| + public get(): R|org/jetbrains/kotlinx/dataframe/DataFrame</Topics_311>| - public final val R|org/jetbrains/kotlinx/dataframe/ColumnsContainer</ReadCSV_74I>|.topics: R|org/jetbrains/kotlinx/dataframe/DataColumn| - public get(): R|org/jetbrains/kotlinx/dataframe/DataColumn| + public final val R|org/jetbrains/kotlinx/dataframe/ColumnsContainer</ReadCSV_74I>|.topics: R|org/jetbrains/kotlinx/dataframe/DataColumn/Topics_311>>| + public get(): R|org/jetbrains/kotlinx/dataframe/DataColumn/Topics_311>>| public final val R|org/jetbrains/kotlinx/dataframe/DataRow</ReadCSV_74I>|.watchers: R|kotlin/Int| public get(): R|kotlin/Int| @@ -58,11 +58,34 @@ FILE: readCSV.kt } + local abstract class Topics_311 : R|kotlin/Any| { + @R|org/jetbrains/kotlinx/dataframe/annotations/Order|(order = Int(0)) public abstract val value: R|kotlin/Double| + public get(): R|kotlin/Double| + + public constructor(): R|/Topics_311| + + } + + local final class Scope1 : R|kotlin/Any| { + public final val R|org/jetbrains/kotlinx/dataframe/DataRow</Topics_311>|.value: R|kotlin/Double| + public get(): R|kotlin/Double| + + public final val R|org/jetbrains/kotlinx/dataframe/ColumnsContainer</Topics_311>|.value: R|org/jetbrains/kotlinx/dataframe/DataColumn| + public get(): R|org/jetbrains/kotlinx/dataframe/DataColumn| + + public constructor(): R|/Scope1| + + } + local abstract class ReadCSV_74 : R|/ReadCSV_74I| { public abstract var scope0: R|/Scope0| public get(): R|/Scope0| public set(value: R|/Scope0|): R|kotlin/Unit| + public abstract var scope1: R|/Scope1| + public get(): R|/Scope1| + public set(value: R|/Scope1|): R|kotlin/Unit| + public constructor(): R|/ReadCSV_74| } diff --git a/plugins/kotlin-dataframe/testData/box/toDataFrame.fir.ir.txt b/plugins/kotlin-dataframe/testData/box/toDataFrame.fir.ir.txt index a717a2399e..3f23a6ac6d 100644 --- a/plugins/kotlin-dataframe/testData/box/toDataFrame.fir.ir.txt +++ b/plugins/kotlin-dataframe/testData/box/toDataFrame.fir.ir.txt @@ -259,6 +259,10 @@ FILE fqName: fileName:/toDataFrame.kt public final fun compareTo (other: E of kotlin.Enum): kotlin.Int declared in kotlin.Enum $this: VALUE_PARAMETER name: type:kotlin.Enum<.Switch> VALUE_PARAMETER name:other index:0 type:.Switch + FUN FAKE_OVERRIDE name:describeConstable visibility:public modality:FINAL <> ($this:kotlin.Enum<.Switch>) returnType:@[FlexibleNullability] java.util.Optional<@[FlexibleNullability] java.lang.Enum.EnumDesc<@[FlexibleNullability] .Switch?>?>? [fake_override] + overridden: + public final fun describeConstable (): @[FlexibleNullability] java.util.Optional<@[FlexibleNullability] java.lang.Enum.EnumDesc<@[FlexibleNullability] E of kotlin.Enum?>?>? declared in kotlin.Enum + $this: VALUE_PARAMETER name: type:kotlin.Enum<.Switch> FUN FAKE_OVERRIDE name:equals visibility:public modality:FINAL <> ($this:kotlin.Enum<.Switch>, other:kotlin.Any?) returnType:kotlin.Boolean [fake_override,operator] overridden: public final fun equals (other: kotlin.Any?): kotlin.Boolean declared in kotlin.Enum diff --git a/plugins/kotlin-dataframe/testData/box/toDataFrame_dsl.fir.ir.txt b/plugins/kotlin-dataframe/testData/box/toDataFrame_dsl.fir.ir.txt index 44111f08cf..3c60b419e4 100644 --- a/plugins/kotlin-dataframe/testData/box/toDataFrame_dsl.fir.ir.txt +++ b/plugins/kotlin-dataframe/testData/box/toDataFrame_dsl.fir.ir.txt @@ -321,6 +321,10 @@ FILE fqName: fileName:/toDataFrame_dsl.kt public final fun compareTo (other: E of kotlin.Enum): kotlin.Int declared in kotlin.Enum $this: VALUE_PARAMETER name: type:kotlin.Enum<.Switch> VALUE_PARAMETER name:other index:0 type:.Switch + FUN FAKE_OVERRIDE name:describeConstable visibility:public modality:FINAL <> ($this:kotlin.Enum<.Switch>) returnType:@[FlexibleNullability] java.util.Optional<@[FlexibleNullability] java.lang.Enum.EnumDesc<@[FlexibleNullability] .Switch?>?>? [fake_override] + overridden: + public final fun describeConstable (): @[FlexibleNullability] java.util.Optional<@[FlexibleNullability] java.lang.Enum.EnumDesc<@[FlexibleNullability] E of kotlin.Enum?>?>? declared in kotlin.Enum + $this: VALUE_PARAMETER name: type:kotlin.Enum<.Switch> FUN FAKE_OVERRIDE name:equals visibility:public modality:FINAL <> ($this:kotlin.Enum<.Switch>, other:kotlin.Any?) returnType:kotlin.Boolean [fake_override,operator] overridden: public final fun equals (other: kotlin.Any?): kotlin.Boolean declared in kotlin.Enum From f3770cf0097c2fb8030c540c2252a9c9099a66fb Mon Sep 17 00:00:00 2001 From: devcrocod Date: Fri, 14 Jun 2024 13:19:34 +0200 Subject: [PATCH 09/10] Add generated sources --- .../kotlinx/dataframe/impl/io/readJson.kt | 131 ++++++---- .../kotlinx/dataframe/impl/io/writeJson.kt | 238 ++++++++++-------- .../jetbrains/kotlinx/dataframe/io/guess.kt | 14 +- .../jetbrains/kotlinx/dataframe/io/json.kt | 77 +++--- .../dataframe/jupyter/JupyterHtmlRenderer.kt | 24 +- .../org/jetbrains/kotlinx/dataframe/Utils.kt | 10 +- .../dataframe/io/ImageSerializationTests.kt | 19 +- .../jetbrains/kotlinx/dataframe/io/json.kt | 92 +++---- .../dataframe/jupyter/RenderingTests.kt | 53 ++-- 9 files changed, 373 insertions(+), 285 deletions(-) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt index f1053cda81..a61b23cc2a 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt @@ -1,7 +1,20 @@ package org.jetbrains.kotlinx.dataframe.impl.io -import com.beust.klaxon.JsonArray -import com.beust.klaxon.JsonObject +import kotlinx.serialization.json.JsonArray +import kotlinx.serialization.json.JsonNull +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.JsonPrimitive +import kotlinx.serialization.json.boolean +import kotlinx.serialization.json.booleanOrNull +import kotlinx.serialization.json.double +import kotlinx.serialization.json.doubleOrNull +import kotlinx.serialization.json.float +import kotlinx.serialization.json.floatOrNull +import kotlinx.serialization.json.int +import kotlinx.serialization.json.intOrNull +import kotlinx.serialization.json.jsonArray +import kotlinx.serialization.json.long +import kotlinx.serialization.json.longOrNull import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataColumn @@ -73,8 +86,8 @@ internal fun readJson( val df: AnyFrame = when (typeClashTactic) { ARRAY_AND_VALUE_COLUMNS -> { when (parsed) { - is JsonArray<*> -> fromJsonListArrayAndValueColumns( - records = parsed.value, + is JsonArray -> fromJsonListArrayAndValueColumns( + records = parsed, header = header, keyValuePaths = keyValuePaths, ) @@ -88,8 +101,8 @@ internal fun readJson( ANY_COLUMNS -> { when (parsed) { - is JsonArray<*> -> fromJsonListAnyColumns( - records = parsed.value, + is JsonArray -> fromJsonListAnyColumns( + records = parsed, header = header, keyValuePaths = keyValuePaths, ) @@ -126,18 +139,16 @@ internal fun fromJsonListAnyColumns( // list element type can be JsonObject, JsonArray or primitive val nameGenerator = ColumnNameGenerator() - records.forEach { - when (it) { + records.forEach { record -> + when (record) { is JsonObject -> { hasObject = true - it.entries.forEach { - nameGenerator.addIfAbsent(it.key) - } + record.entries.forEach { nameGenerator.addIfAbsent(it.key) } } - is JsonArray<*> -> hasArray = true - null -> Unit - else -> hasPrimitive = true + is JsonArray -> hasArray = true + is JsonNull, null -> Unit + is JsonPrimitive -> hasPrimitive = true } } @@ -177,7 +188,7 @@ internal fun fromJsonListAnyColumns( ) } - is JsonArray<*> -> { + is JsonArray -> { val parsed = fromJsonListAnyColumns( records = v, keyValuePaths = keyValuePaths, @@ -189,9 +200,22 @@ internal fun fromJsonListAnyColumns( ) } - "NaN" -> { - nanIndices.add(i) - collector.add(null) + is JsonNull -> collector.add(null) + + is JsonPrimitive -> { + when { + v.content == "NaN" -> { + nanIndices.add(i) + collector.add(null) + } + + v.isString -> collector.add(v.content) + v.booleanOrNull != null -> collector.add(v.boolean) + v.intOrNull != null -> collector.add(v.int) + v.longOrNull != null -> collector.add(v.long) + v.doubleOrNull != null -> collector.add(v.double) + v.floatOrNull != null -> collector.add(v.float) + } } else -> collector.add(v) @@ -227,8 +251,8 @@ internal fun fromJsonListAnyColumns( records.forEach { startIndices.add(values.size) when (it) { - is JsonArray<*> -> values.addAll(it.value) - null -> Unit + is JsonArray -> values.addAll(it) + is JsonNull, null -> Unit else -> error("Expected JsonArray, got $it") } } @@ -242,10 +266,10 @@ internal fun fromJsonListAnyColumns( parsed.isSingleUnnamedColumn() -> { val col = (parsed.getColumn(0) as UnnamedColumn).col val elementType = col.type - val values = col.values.asList().splitByIndices(startIndices.asSequence()).toList() + val columnValues = col.values.asList().splitByIndices(startIndices.asSequence()).toList() DataColumn.createValueColumn( name = arrayColumnName, - values = values, + values = columnValues, type = List::class.createType(listOf(KTypeProjection.invariant(elementType))), ) } @@ -263,10 +287,10 @@ internal fun fromJsonListAnyColumns( colType == AnyColType.OBJECTS && isKeyValue -> { // collect the value types to make sure Value columns with lists and other values aren't all turned into lists val valueTypes = mutableSetOf() - val dataFrames = records.map { - when (it) { + val dataFrames = records.map { record -> + when (record) { is JsonObject -> { - val map = it.map.mapValues { (key, value) -> + val map = record.mapValues { (key, value) -> val parsed = fromJsonListAnyColumns( records = listOf(value), keyValuePaths = keyValuePaths, @@ -288,8 +312,8 @@ internal fun fromJsonListAnyColumns( ) } - null -> DataFrame.emptyOf() - else -> error("Expected JsonObject, got $it") + is JsonNull, null -> DataFrame.emptyOf() + else -> error("Expected JsonObject, got $record") } } @@ -328,7 +352,7 @@ internal fun fromJsonListAnyColumns( records.forEach { when (it) { is JsonObject -> values.add(it[colName]) - null -> values.add(null) + is JsonNull, null -> values.add(null) else -> error("Expected JsonObject, got $it") } } @@ -401,18 +425,18 @@ internal fun fromJsonListArrayAndValueColumns( // { "array": [], "value": 123, "a": null, "b": null } val nameGenerator = ColumnNameGenerator() - records.forEach { - when (it) { - is JsonObject -> it.entries.forEach { + records.forEach { record -> + when (record) { + is JsonObject -> record.entries.forEach { nameGenerator.addIfAbsent(it.key) } - is JsonArray<*> -> hasArray = true - null -> Unit - else -> hasPrimitive = true + is JsonArray -> hasArray = true + is JsonNull, null -> Unit + is JsonPrimitive -> hasPrimitive = true } } - if (records.all { it == null }) hasPrimitive = true + if (records.all { it == null || it is JsonNull }) hasPrimitive = true // Add a value column to the collected names if needed val valueColumn = if (hasPrimitive || records.isEmpty()) { @@ -433,10 +457,10 @@ internal fun fromJsonListArrayAndValueColumns( val columns: List = when { // instead of using the names, generate a single key/value frame column isKeyValue -> { - val dataFrames = records.map { - when (it) { + val dataFrames = records.map { record -> + when (record) { is JsonObject -> { - val map = it.map.mapValues { (key, value) -> + val map = record.mapValues { (key, value) -> val parsed = fromJsonListArrayAndValueColumns( records = listOf(value), keyValuePaths = keyValuePaths, @@ -459,8 +483,8 @@ internal fun fromJsonListArrayAndValueColumns( ) } - null -> DataFrame.emptyOf() - else -> error("Expected JsonObject, got $it") + is JsonNull, null -> DataFrame.emptyOf() + else -> error("Expected JsonObject, got $record") } } @@ -488,10 +512,22 @@ internal fun fromJsonListArrayAndValueColumns( records.forEachIndexed { i, v -> when (v) { is JsonObject -> collector.add(null) - is JsonArray<*> -> collector.add(null) - "NaN" -> { - nanIndices.add(i) - collector.add(null) + is JsonArray -> collector.add(null) + is JsonNull -> collector.add(null) + is JsonPrimitive -> { + when { + v.content == "NaN" -> { + nanIndices.add(i) + collector.add(null) + } + + v.isString -> collector.add(v.content) + v.booleanOrNull != null -> collector.add(v.boolean) + v.intOrNull != null -> collector.add(v.int) + v.longOrNull != null -> collector.add(v.long) + v.doubleOrNull != null -> collector.add(v.double) + v.floatOrNull != null -> collector.add(v.float) + } } else -> collector.add(v) @@ -526,7 +562,7 @@ internal fun fromJsonListArrayAndValueColumns( val startIndices = ArrayList() records.forEach { startIndices.add(values.size) - if (it is JsonArray<*>) values.addAll(it.value) + if (it is JsonArray) values.addAll(it.jsonArray) } val parsed = fromJsonListArrayAndValueColumns( records = values, @@ -538,10 +574,11 @@ internal fun fromJsonListArrayAndValueColumns( parsed.isSingleUnnamedColumn() -> { val col = (parsed.getColumn(0) as UnnamedColumn).col val elementType = col.type - val values = col.values.asList().splitByIndices(startIndices.asSequence()).toList() + val columnValues = + col.values.asList().splitByIndices(startIndices.asSequence()).toList() DataColumn.createValueColumn( name = colName, - values = values, + values = columnValues, type = List::class.createType(listOf(KTypeProjection.invariant(elementType))), ) } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt index 2bff506bcd..174cbbfaf2 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt @@ -1,8 +1,18 @@ +@file:OptIn(ExperimentalSerializationApi::class) + package org.jetbrains.kotlinx.dataframe.impl.io -import com.beust.klaxon.JsonArray -import com.beust.klaxon.JsonObject -import com.beust.klaxon.KlaxonJson +import kotlinx.serialization.ExperimentalSerializationApi +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonArray +import kotlinx.serialization.json.JsonElement +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.JsonPrimitive +import kotlinx.serialization.json.buildJsonArray +import kotlinx.serialization.json.buildJsonObject +import kotlinx.serialization.json.encodeToJsonElement +import kotlinx.serialization.json.putJsonArray +import kotlinx.serialization.json.putJsonObject import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.ColumnsContainer @@ -36,17 +46,9 @@ import org.jetbrains.kotlinx.dataframe.typeClass import java.awt.image.BufferedImage import java.io.IOException -internal fun KlaxonJson.encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject? { - val values = frame.columns().map { col -> - when (col) { - is ColumnGroup<*> -> encodeRow(col, index) - is FrameColumn<*> -> encodeFrame(col[index]) - else -> encodeValue(col, index) - }.let { col.name to it } - } - if (values.isEmpty()) return null - return obj(values) -} +// See docs/serialization_format.md for a description of +// serialization versions and format. +internal const val SERIALIZATION_VERSION = "2.1.0" internal object SerializationKeys { const val DATA = "data" @@ -61,31 +63,70 @@ internal object SerializationKeys { const val TYPES = "types" } -// See docs/serialization_format.md for a description of -// serialization versions and format. -internal const val SERIALIZATION_VERSION = "2.1.0" +private val valueTypes = + setOf(Boolean::class, Double::class, Int::class, Float::class, Long::class, Short::class, Byte::class) + +@OptIn(ExperimentalSerializationApi::class) +private fun convert(value: Any?): JsonElement = when (value) { + is JsonElement -> value + is Number -> JsonPrimitive(value) + is String -> JsonPrimitive(value) + is Char -> JsonPrimitive(value.toString()) + is Boolean -> JsonPrimitive(value) + null -> JsonPrimitive(null) + else -> JsonPrimitive(value.toString()) +} + +internal fun encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject { + val values: Map = frame.columns().associate { col -> + col.name to when { + col is ColumnGroup<*> -> encodeRow(col, index) + col is FrameColumn<*> -> encodeFrame(col[index]) + col.isList() -> { + col[index]?.let { + JsonArray((it as List<*>).map { value -> convert(value) }) + } ?: JsonPrimitive(null) + } + + col.typeClass in valueTypes -> { + val v = col[index] + convert(v) + } + + else -> JsonPrimitive(col[index]?.toString()) + } + } + + if (values.isEmpty()) return buildJsonObject { } + return JsonObject(values) +} -internal fun KlaxonJson.encodeRowWithMetadata( +internal fun encodeRowWithMetadata( frame: ColumnsContainer<*>, index: Int, rowLimit: Int? = null, imageEncodingOptions: Base64ImageEncodingOptions? = null -): JsonObject? { - val values = frame.columns().map { col -> +): JsonElement? { + val values: List> = frame.columns().map { col -> when (col) { is ColumnGroup<*> -> { val schema = col.schema() - obj( - DATA to encodeRowWithMetadata(col, index, rowLimit, imageEncodingOptions), - METADATA to obj( - KIND to ColumnKind.Group.toString(), - COLUMNS to schema.columns.keys, - TYPES to schema.columns.values.map { columnSchema -> - createJsonTypeDescriptor(columnSchema) + buildJsonObject { + put(DATA, encodeRowWithMetadata(col, index, rowLimit, imageEncodingOptions) ?: JsonPrimitive(null)) + putJsonObject(METADATA) { + put(KIND, JsonPrimitive(ColumnKind.Group.toString())) + put(COLUMNS, Json.encodeToJsonElement(schema.columns.keys)) + putJsonArray(TYPES) { + addAll( + schema.columns.values.map { columnSchema -> + createJsonTypeDescriptor(columnSchema) + } + ) } - ), - ) + } + } } + is FrameColumn<*> -> { val data = if (rowLimit == null) { encodeFrameWithMetadata(col[index], null, imageEncodingOptions) @@ -93,59 +134,49 @@ internal fun KlaxonJson.encodeRowWithMetadata( encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, imageEncodingOptions) } val schema = col.schema.value - obj( - DATA to data, - METADATA to obj( - KIND to ColumnKind.Frame.toString(), - COLUMNS to schema.columns.keys, - TYPES to schema.columns.values.map { columnSchema -> - createJsonTypeDescriptor(columnSchema) - }, - NCOL to col[index].ncol, - NROW to col[index].nrow - ) - ) + buildJsonObject { + put(DATA, data) + putJsonObject(METADATA) { + put(KIND, JsonPrimitive(ColumnKind.Frame.toString())) + put(COLUMNS, Json.encodeToJsonElement(schema.columns.keys)) + putJsonArray(TYPES) { + addAll( + schema.columns.values.map { columnSchema -> + createJsonTypeDescriptor(columnSchema) + } + ) + } + put(NCOL, JsonPrimitive(col[index].ncol)) + put(NROW, JsonPrimitive(col[index].nrow)) + } + } } + else -> encodeValue(col, index, imageEncodingOptions) }.let { col.name to it } } if (values.isEmpty()) return null - return obj(values) + return JsonObject(values.toMap()) } -private val valueTypes = - setOf(Boolean::class, Double::class, Int::class, Float::class, Long::class, Short::class, Byte::class) - -internal fun KlaxonJson.encodeValue( +internal fun encodeValue( col: AnyCol, index: Int, imageEncodingOptions: Base64ImageEncodingOptions? = null -): Any? = when { +): JsonElement = when { col.isList() -> col[index]?.let { list -> - val values = (list as List<*>).map { - when (it) { - null, is Int, is Double, is Float, is Long, is Boolean, is Short, is Byte -> it - // Klaxon default serializers will try to use reflection and can sometimes fail. - // We can't have exceptions in Notebook DataFrame renderer - else -> it.toString() - } - } - array(values) - } ?: array() - - col.typeClass in valueTypes -> { - val v = col[index] - if ((v is Double && v.isNaN()) || (v is Float && v.isNaN())) { - v.toString() - } else v - } + val values = (list as List<*>).map { convert(it) } + JsonArray(values) + } ?: JsonArray(emptyList()) + + col.typeClass in valueTypes -> convert(col[index]) col.typeClass == BufferedImage::class && imageEncodingOptions != null -> col[index]?.let { image -> - encodeBufferedImageAsBase64(image as BufferedImage, imageEncodingOptions) - } ?: "" + JsonPrimitive(encodeBufferedImageAsBase64(image as BufferedImage, imageEncodingOptions)) + } ?: JsonPrimitive("") - else -> col[index]?.toString() + else -> JsonPrimitive(col[index]?.toString()) } private fun encodeBufferedImageAsBase64( @@ -173,19 +204,19 @@ private fun encodeBufferedImageAsBase64( private fun createJsonTypeDescriptor(columnSchema: ColumnSchema): JsonObject { return JsonObject( - mutableMapOf(KIND to columnSchema.kind.toString()).also { + mutableMapOf(KIND to JsonPrimitive(columnSchema.kind.toString())).also { if (columnSchema.kind == ColumnKind.Value) { - it.put(TYPE, columnSchema.type.toString()) + it[TYPE] = JsonPrimitive(columnSchema.type.toString()) } } ) } -internal fun KlaxonJson.encodeFrameWithMetadata( +internal fun encodeFrameWithMetadata( frame: AnyFrame, rowLimit: Int? = null, imageEncodingOptions: Base64ImageEncodingOptions? = null -): JsonArray<*> { +): JsonArray { val valueColumn = frame.extractValueColumn() val arrayColumn = frame.extractArrayColumn() @@ -205,7 +236,7 @@ internal fun KlaxonJson.encodeFrameWithMetadata( ?: encodeRowWithMetadata(frame, rowIndex, rowLimit, imageEncodingOptions) } - return array(data) + return buildJsonArray { addAll(data.map { convert(it) }) } } internal fun AnyFrame.extractValueColumn(): DataColumn<*>? { @@ -232,9 +263,9 @@ internal fun AnyFrame.extractValueColumn(): DataColumn<*>? { } } -// if there is only 1 column, then `isValidValueColumn` always true. -// But at the same time, we shouldn't treat dataFrameOf("value")(1,2,3) like unnamed column -// because it was created by user. +// If there is only 1 column, then `isValidValueColumn` always true. +// But at the same time, we shouldn't treat dataFrameOf("value")(1,2,3) like an unnamed column +// because it was created by the user. internal val AnyFrame.isPossibleToFindUnnamedColumns: Boolean get() = columns().size != 1 @@ -261,45 +292,50 @@ internal fun AnyFrame.extractArrayColumn(): DataColumn<*>? { } } -internal fun KlaxonJson.encodeFrame(frame: AnyFrame): JsonArray<*> { +internal fun encodeFrame(frame: AnyFrame): JsonArray { val valueColumn = frame.extractValueColumn() val arrayColumn = frame.extractArrayColumn() val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame val data = frame.indices().map { rowIndex -> - valueColumn - ?.get(rowIndex) - ?: arrayColumn?.get(rowIndex) - ?.let { - if (arraysAreFrames) encodeFrame(it as AnyFrame) else null - } - ?: encodeRow(frame, rowIndex) + when { + valueColumn != null -> valueColumn[rowIndex] + arrayColumn != null -> arrayColumn[rowIndex]?.let { if (arraysAreFrames) encodeFrame(it as AnyFrame) else null } + else -> encodeRow(frame, rowIndex) + } } - return array(data) + return buildJsonArray { addAll(data.map { convert(it) }) } } -internal fun KlaxonJson.encodeDataFrameWithMetadata( +internal fun encodeDataFrameWithMetadata( frame: AnyFrame, rowLimit: Int, nestedRowLimit: Int? = null, imageEncodingOptions: Base64ImageEncodingOptions? = null ): JsonObject { - return obj( - VERSION to SERIALIZATION_VERSION, - METADATA to obj( - COLUMNS to frame.columnNames(), - TYPES to frame.schema().columns.values.map { colSchema -> - createJsonTypeDescriptor(colSchema) - }, - NROW to frame.rowsCount(), - NCOL to frame.columnsCount() - ), - KOTLIN_DATAFRAME to encodeFrameWithMetadata( - frame.take(rowLimit), - rowLimit = nestedRowLimit, - imageEncodingOptions - ), - ) + return buildJsonObject { + put(VERSION, JsonPrimitive(SERIALIZATION_VERSION)) + putJsonObject(METADATA) { + putJsonArray(COLUMNS) { addAll(frame.columnNames().map { JsonPrimitive(it) }) } + putJsonArray(TYPES) { + addAll( + frame.schema().columns.values.map { colSchema -> + createJsonTypeDescriptor(colSchema) + } + ) + } + put(NROW, JsonPrimitive(frame.rowsCount())) + put(NCOL, JsonPrimitive(frame.columnsCount())) + } + put( + KOTLIN_DATAFRAME, + encodeFrameWithMetadata( + frame.take(rowLimit), + rowLimit = nestedRowLimit, + imageEncodingOptions + ) + ) + } } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt index 3fa4dba46f..428254dcc2 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt @@ -208,21 +208,15 @@ internal fun DataFrame.Companion.read( formats: List = supportedFormats.filterIsInstance(), ): ReadAnyFrame { if (format != null) return format to format.readDataFrame(stream, header = header) - val input = NotCloseableStream(if (stream.markSupported()) stream else BufferedInputStream(stream)) - try { - val readLimit = 10000 - input.mark(readLimit) - + stream.use { input -> + val byteArray = input.readBytes() // read 8192 bytes formats.sortedBy { it.testOrder }.forEach { try { - input.reset() - return it to it.readDataFrame(input, header = header) - } catch (e: Exception) { + return it to it.readDataFrame(byteArray.inputStream(), header = header) + } catch (_: Exception) { } } throw IllegalArgumentException("Unknown stream format; Tried $formats") - } finally { - input.doClose() } } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index b1737012a6..331fc5f34d 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -1,7 +1,9 @@ package org.jetbrains.kotlinx.dataframe.io -import com.beust.klaxon.Parser -import com.beust.klaxon.json +import kotlinx.serialization.ExperimentalSerializationApi +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonElement +import kotlinx.serialization.json.decodeFromStream import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.DataFrame @@ -20,6 +22,8 @@ import org.jetbrains.kotlinx.dataframe.impl.io.encodeDataFrameWithMetadata import org.jetbrains.kotlinx.dataframe.impl.io.encodeFrame import org.jetbrains.kotlinx.dataframe.impl.io.encodeRow import org.jetbrains.kotlinx.dataframe.impl.io.readJson +import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.GZIP_ON +import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.LIMIT_SIZE_ON import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS @@ -110,8 +114,8 @@ public class JSON( } } -public const val arrayColumnName: String = "array" -public const val valueColumnName: String = "value" +internal const val arrayColumnName: String = "array" +internal const val valueColumnName: String = "value" /** * @param file Where to fetch the Json as [InputStream] to be converted to a [DataFrame]. @@ -213,12 +217,13 @@ public fun DataRow.Companion.readJson( * @param header Optional list of column names. If given, [stream] will be read like an object with [header] being the keys. * @return [DataFrame] from the given [stream]. */ +@OptIn(ExperimentalSerializationApi::class) public fun DataFrame.Companion.readJson( stream: InputStream, header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = readJson(Parser.default().parse(stream), header, keyValuePaths, typeClashTactic) +): AnyFrame = readJson(Json.decodeFromStream(stream), header, keyValuePaths, typeClashTactic) /** * @param stream Json as [InputStream] to be converted to a [DataRow]. @@ -250,7 +255,7 @@ public fun DataFrame.Companion.readJsonStr( header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = readJson(Parser.default().parse(StringBuilder(text)), header, keyValuePaths, typeClashTactic) +): AnyFrame = readJson(Json.parseToJsonElement(text), header, keyValuePaths, typeClashTactic) /** * @param text Json as [String] to be converted to a [DataRow]. @@ -267,22 +272,24 @@ public fun DataRow.Companion.readJsonStr( typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, ): AnyRow = DataFrame.readJsonStr(text, header, keyValuePaths, typeClashTactic).single() -public fun AnyFrame.toJson(prettyPrint: Boolean = false, canonical: Boolean = false): String { - return json { - encodeFrame(this@toJson) - }.toJsonString(prettyPrint, canonical) +public fun AnyFrame.toJson(prettyPrint: Boolean = false): String { + val json = Json { + this.prettyPrint = prettyPrint + isLenient = true + allowSpecialFloatingPointValues = true + } + return json.encodeToString(JsonElement.serializer(), encodeFrame(this@toJson)) } /** * Converts the DataFrame to a JSON string representation with additional metadata about serialized data. - * It is heavily used to implement some integration features in Kotlin Notebook IntellJ IDEA plugin. + * It is heavily used to implement some integration features in Kotlin Notebook IntelliJ IDEA plugin. * * @param rowLimit The maximum number of top-level dataframe rows to include in the output JSON. * @param nestedRowLimit The maximum number of nested frame rows to include in the output JSON. * If null, all rows are included. * Applied for each frame column recursively * @param prettyPrint Specifies whether the output JSON should be formatted with indentation and line breaks. - * @param canonical Specifies whether the output JSON should be in a canonical form. * @param imageEncodingOptions The options for encoding images. The default is null, which indicates that the image is not encoded as Base64. * * @return The DataFrame converted to a JSON string with metadata. @@ -291,12 +298,17 @@ public fun AnyFrame.toJsonWithMetadata( rowLimit: Int, nestedRowLimit: Int? = null, prettyPrint: Boolean = false, - canonical: Boolean = false, imageEncodingOptions: Base64ImageEncodingOptions? = null ): String { - return json { + val json = Json { + this.prettyPrint = prettyPrint + isLenient = true + allowSpecialFloatingPointValues = true + } + return json.encodeToString( + JsonElement.serializer(), encodeDataFrameWithMetadata(this@toJsonWithMetadata, rowLimit, nestedRowLimit, imageEncodingOptions) - }.toJsonString(prettyPrint, canonical) + ) } internal const val DEFAULT_IMG_SIZE = 600 @@ -324,31 +336,34 @@ public class Base64ImageEncodingOptions( } } -public fun AnyRow.toJson(prettyPrint: Boolean = false, canonical: Boolean = false): String { - return json { - encodeRow(df(), index()) - }?.toJsonString(prettyPrint, canonical) ?: "" +public fun AnyRow.toJson(prettyPrint: Boolean = false): String { + val json = Json { + this.prettyPrint = prettyPrint + isLenient = true + allowSpecialFloatingPointValues = true + } + return json.encodeToString(JsonElement.serializer(), encodeRow(df(), index())) } -public fun AnyFrame.writeJson(file: File, prettyPrint: Boolean = false, canonical: Boolean = false) { - file.writeText(toJson(prettyPrint, canonical)) +public fun AnyFrame.writeJson(file: File, prettyPrint: Boolean = false) { + file.writeText(toJson(prettyPrint)) } -public fun AnyFrame.writeJson(path: String, prettyPrint: Boolean = false, canonical: Boolean = false): Unit = - writeJson(File(path), prettyPrint, canonical) +public fun AnyFrame.writeJson(path: String, prettyPrint: Boolean = false): Unit = + writeJson(File(path), prettyPrint) -public fun AnyFrame.writeJson(writer: Appendable, prettyPrint: Boolean = false, canonical: Boolean = false) { - writer.append(toJson(prettyPrint, canonical)) +public fun AnyFrame.writeJson(writer: Appendable, prettyPrint: Boolean = false) { + writer.append(toJson(prettyPrint)) } -public fun AnyRow.writeJson(file: File, prettyPrint: Boolean = false, canonical: Boolean = false) { - file.writeText(toJson(prettyPrint, canonical)) +public fun AnyRow.writeJson(file: File, prettyPrint: Boolean = false) { + file.writeText(toJson(prettyPrint)) } -public fun AnyRow.writeJson(path: String, prettyPrint: Boolean = false, canonical: Boolean = false) { - writeJson(File(path), prettyPrint, canonical) +public fun AnyRow.writeJson(path: String, prettyPrint: Boolean = false) { + writeJson(File(path), prettyPrint) } -public fun AnyRow.writeJson(writer: Appendable, prettyPrint: Boolean = false, canonical: Boolean = false) { - writer.append(toJson(prettyPrint, canonical)) +public fun AnyRow.writeJson(writer: Appendable, prettyPrint: Boolean = false) { + writer.append(toJson(prettyPrint)) } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt index 469dd634fa..4d8e91bffe 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt @@ -1,6 +1,10 @@ package org.jetbrains.kotlinx.dataframe.jupyter -import com.beust.klaxon.json +import kotlinx.serialization.ExperimentalSerializationApi +import kotlinx.serialization.json.addAll +import kotlinx.serialization.json.buildJsonObject +import kotlinx.serialization.json.put +import kotlinx.serialization.json.putJsonArray import org.jetbrains.kotlinx.dataframe.api.take import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME @@ -23,6 +27,7 @@ import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult import org.jetbrains.kotlinx.jupyter.api.Notebook import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration import org.jetbrains.kotlinx.jupyter.api.mimeResult +import org.jetbrains.kotlinx.jupyter.api.outputs.isIsolatedHtml import org.jetbrains.kotlinx.jupyter.api.renderHtmlAsIFrameIfNeeded /** Starting from this version, dataframe integration will respond with additional data for rendering in Kotlin Notebooks plugin. */ @@ -35,6 +40,7 @@ internal class JupyterHtmlRenderer( val builder: JupyterIntegration.Builder, ) +@OptIn(ExperimentalSerializationApi::class) internal inline fun JupyterHtmlRenderer.render( noinline getFooter: (T) -> String, crossinline modifyConfig: T.(DisplayConfiguration) -> DisplayConfiguration = { it }, @@ -72,14 +78,12 @@ internal inline fun JupyterHtmlRenderer.render( // TODO Do we need to handle the improved meta data here as well? val jsonEncodedDf = when { !ideBuildNumber.supportsDynamicNestedTables() -> { - json { - obj( - NROW to df.size.nrow, - NCOL to df.size.ncol, - COLUMNS to df.columnNames(), - KOTLIN_DATAFRAME to encodeFrame(df.take(limit)), - ) - }.toJsonString() + buildJsonObject { + put(NROW, df.size.nrow) + put(NCOL, df.size.ncol) + putJsonArray(COLUMNS) { addAll(df.columnNames()) } + put(KOTLIN_DATAFRAME, encodeFrame(df.take(limit))) + }.toString() } else -> { @@ -121,7 +125,7 @@ internal fun Notebook.renderAsIFrameAsNeeded( return mimeResult( "text/html" to textHtml, "application/kotlindataframe+json" to jsonEncodedDf - ).also { it.isolatedHtml = false } + ).also { it.isIsolatedHtml = false } } internal fun DataFrameHtmlData.toJupyterHtmlData() = HtmlData(style, body, script) diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt index dae5f687ba..9c3f18912f 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt @@ -1,7 +1,8 @@ package org.jetbrains.kotlinx.dataframe -import com.beust.klaxon.JsonObject -import com.beust.klaxon.Parser +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.jsonObject import org.jetbrains.kotlinx.dataframe.api.print import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.io.renderToString @@ -27,7 +28,4 @@ fun > T.alsoDebug(println: String? = null, rowsLimit: Int = 20) schema().print() } -fun parseJsonStr(jsonStr: String): JsonObject { - val parser = Parser.default() - return parser.parse(StringBuilder(jsonStr)) as JsonObject -} +fun parseJsonStr(jsonStr: String): JsonObject = Json.parseToJsonElement(jsonStr).jsonObject diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt index cae6e759ce..a6d21d708d 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt @@ -1,9 +1,11 @@ package org.jetbrains.kotlinx.dataframe.io -import com.beust.klaxon.JsonArray -import com.beust.klaxon.JsonObject import io.kotest.matchers.shouldBe import io.kotest.matchers.string.shouldContain +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.jsonArray +import kotlinx.serialization.json.jsonObject +import kotlinx.serialization.json.jsonPrimitive import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME import org.jetbrains.kotlinx.dataframe.impl.io.resizeKeepingAspectRatio @@ -35,7 +37,7 @@ class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOp return } - val decodedImages = decodeImagesFromJson(json, images.size, encodingOptions!!) + val decodedImages = decodeImagesFromJson(json, images.size, encodingOptions) for ((decodedImage, original) in decodedImages.zip(images)) { val expectedImage = resizeIfNeeded(original, encodingOptions) @@ -67,8 +69,8 @@ class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOp private fun checkImagesEncodedAsToString(json: JsonObject, numImgs: Int) { for (i in 0..)[i] as JsonObject - val img = row["imgs"] as String + val row = json[KOTLIN_DATAFRAME]!!.jsonArray[i].jsonObject + val img = row["imgs"]?.jsonPrimitive?.content img shouldContain "BufferedImage" } @@ -81,8 +83,8 @@ class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOp ): List { val result = mutableListOf() for (i in 0..)[i] as JsonObject - val imgString = row["imgs"] as String + val row = json[KOTLIN_DATAFRAME]!!.jsonArray[i].jsonObject + val imgString = row["imgs"]!!.jsonPrimitive.content val bytes = decodeBase64Image(imgString, encodingOptions) val decodedImage = createImageFromBytes(bytes) @@ -156,7 +158,8 @@ class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOp private val DEFAULT = Base64ImageEncodingOptions() private val GZIP_ON_RESIZE_OFF = Base64ImageEncodingOptions(options = GZIP_ON) private val GZIP_OFF_RESIZE_OFF = Base64ImageEncodingOptions(options = ALL_OFF) - private val GZIP_ON_RESIZE_TO_700 = Base64ImageEncodingOptions(imageSizeLimit = 700, options = GZIP_ON or LIMIT_SIZE_ON) + private val GZIP_ON_RESIZE_TO_700 = + Base64ImageEncodingOptions(imageSizeLimit = 700, options = GZIP_ON or LIMIT_SIZE_ON) private val DISABLED = null @JvmStatic diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index b139ad44aa..6eb1025018 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -1,8 +1,5 @@ package org.jetbrains.kotlinx.dataframe.io -import com.beust.klaxon.JsonArray -import com.beust.klaxon.JsonObject -import com.beust.klaxon.Parser import io.kotest.assertions.throwables.shouldNotThrowAny import io.kotest.matchers.collections.shouldBeIn import io.kotest.matchers.shouldBe @@ -10,6 +7,11 @@ import io.kotest.matchers.string.shouldContain import io.kotest.matchers.string.shouldNotContain import io.kotest.matchers.types.instanceOf import io.kotest.matchers.types.shouldBeInstanceOf +import kotlinx.serialization.json.boolean +import kotlinx.serialization.json.int +import kotlinx.serialization.json.jsonArray +import kotlinx.serialization.json.jsonObject +import kotlinx.serialization.json.jsonPrimitive import org.intellij.lang.annotations.Language import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame @@ -24,12 +26,13 @@ import org.jetbrains.kotlinx.dataframe.api.forEach import org.jetbrains.kotlinx.dataframe.api.getColumnGroup import org.jetbrains.kotlinx.dataframe.api.getFrameColumn import org.jetbrains.kotlinx.dataframe.api.schema -import org.jetbrains.kotlinx.dataframe.api.toDouble +import org.jetbrains.kotlinx.dataframe.api.toFloat import org.jetbrains.kotlinx.dataframe.api.toMap import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.get import org.jetbrains.kotlinx.dataframe.impl.io.SERIALIZATION_VERSION import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.DATA @@ -42,6 +45,7 @@ import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION import org.jetbrains.kotlinx.dataframe.impl.nothingType import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS +import org.jetbrains.kotlinx.dataframe.parseJsonStr import org.jetbrains.kotlinx.dataframe.testJson import org.jetbrains.kotlinx.dataframe.type import org.jetbrains.kotlinx.dataframe.values @@ -399,15 +403,16 @@ class JsonTests { fun `NaN float serialization`() { val df = dataFrameOf("v")(1.1f, Float.NaN) df["v"].type() shouldBe typeOf() - DataFrame.readJsonStr(df.toJson()) shouldBe df.convert("v").toDouble() + val actual = DataFrame.readJsonStr(df.toJson()).convert("v").toFloat() + actual shouldBe df } @Test fun `NaN float serialization Any`() { val df = dataFrameOf("v")(1.1f, Float.NaN) df["v"].type() shouldBe typeOf() - DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS) shouldBe df.convert("v") - .toDouble() + val actual = DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS).convert("v").toFloat() + actual shouldBe df } @Test @@ -966,11 +971,11 @@ class JsonTests { @Test fun `nulls in columns should be encoded explicitly`() { val df = dataFrameOf("a", "b")("1", null, "2", 12) - df.toJson(canonical = true) shouldContain "\"b\":null" + df.toJson() shouldContain "\"b\":null" +// df.toJson(canonical = true) shouldContain "\"b\":null" } @Test - @Suppress("UNCHECKED_CAST") fun `json with metadata flat table`() { @Language("json") val data = """ @@ -980,24 +985,19 @@ class JsonTests { val jsonStr = df.toJsonWithMetadata(df.rowsCount()).trimIndent() val json = parseJsonStr(jsonStr) - json[VERSION] shouldBe SERIALIZATION_VERSION + json[VERSION]!!.jsonPrimitive.content shouldBe SERIALIZATION_VERSION - val metadata = (json[METADATA] as JsonObject) - metadata[NROW] shouldBe 1 - metadata[NCOL] shouldBe 4 - val columns = metadata[COLUMNS] as List + val metadata = json[METADATA]!!.jsonObject + metadata[NROW]!!.jsonPrimitive.int shouldBe 1 + metadata[NCOL]!!.jsonPrimitive.int shouldBe 4 + val columns = metadata[COLUMNS]!!.jsonArray.map { it.jsonPrimitive.content } columns shouldBe listOf("id", "node_id", "name", "full_name") - val decodedData = json[KOTLIN_DATAFRAME] as JsonArray<*> - val decodedDf = DataFrame.readJsonStr(decodedData.toJsonString()) + val decodedData = json[KOTLIN_DATAFRAME]!!.jsonArray + val decodedDf = DataFrame.readJsonStr(decodedData.toString()) decodedDf shouldBe df } - private fun parseJsonStr(jsonStr: String): JsonObject { - val parser = Parser.default() - return parser.parse(StringBuilder(jsonStr)) as JsonObject - } - @Test fun `json with metadata column group`() { @Language("json") @@ -1008,19 +1008,19 @@ class JsonTests { val jsonStr = df.toJsonWithMetadata(df.rowsCount()).trimIndent() val json = parseJsonStr(jsonStr) - val row = (json[KOTLIN_DATAFRAME] as JsonArray<*>)[0] as JsonObject + val row = json[KOTLIN_DATAFRAME]!!.jsonArray[0].jsonObject - val permissions = row["permissions"] as JsonObject - val metadata = permissions[METADATA] as JsonObject - metadata[KIND] shouldBe ColumnKind.Group.toString() + val permissions = row["permissions"]!!.jsonObject + val metadata = permissions[METADATA]!!.jsonObject + metadata[KIND]!!.jsonPrimitive.content shouldBe ColumnKind.Group.toString() - val decodedData = permissions[DATA] as JsonObject + val decodedData = permissions[DATA]!!.jsonObject - decodedData["admin"] shouldBe false - decodedData["maintain"] shouldBe false - decodedData["push"] shouldBe false - decodedData["triage"] shouldBe false - decodedData["pull"] shouldBe true + decodedData["admin"]!!.jsonPrimitive.boolean shouldBe false + decodedData["maintain"]!!.jsonPrimitive.boolean shouldBe false + decodedData["push"]!!.jsonPrimitive.boolean shouldBe false + decodedData["triage"]!!.jsonPrimitive.boolean shouldBe false + decodedData["pull"]!!.jsonPrimitive.boolean shouldBe true } @Test @@ -1028,19 +1028,19 @@ class JsonTests { val df = DataFrame.readJson(testJson("repositories")) val jsonStr = df.toJsonWithMetadata(df.rowsCount()).trimIndent() val json = parseJsonStr(jsonStr) - val row = (json[KOTLIN_DATAFRAME] as JsonArray<*>)[0] as JsonObject + val row = json[KOTLIN_DATAFRAME]!!.jsonArray[0].jsonObject - val contributors = row["contributors"] as JsonObject + val contributors = row["contributors"]!!.jsonObject - val metadata = contributors[METADATA] as JsonObject - metadata[KIND] shouldBe ColumnKind.Frame.toString() - metadata[NCOL] shouldBe 8 - metadata[NROW] shouldBe 29 + val metadata = contributors[METADATA]!!.jsonObject + metadata[KIND]!!.jsonPrimitive.content shouldBe ColumnKind.Frame.toString() + metadata[NCOL]!!.jsonPrimitive.int shouldBe 8 + metadata[NROW]!!.jsonPrimitive.int shouldBe 29 - val decodedData = contributors[DATA] as JsonArray<*> + val decodedData = contributors[DATA]!!.jsonArray decodedData.size shouldBe 29 - val decodedDf = DataFrame.readJsonStr(decodedData.toJsonString()) + val decodedDf = DataFrame.readJsonStr(decodedData.toString()) decodedDf shouldBe df[0]["contributors"] as AnyFrame } @@ -1050,16 +1050,16 @@ class JsonTests { val nestedFrameRowLimit = 20 val jsonStr = df.toJsonWithMetadata(df.rowsCount(), nestedFrameRowLimit).trimIndent() val json = parseJsonStr(jsonStr) - val row = (json[KOTLIN_DATAFRAME] as JsonArray<*>)[0] as JsonObject + val row = json[KOTLIN_DATAFRAME]!!.jsonArray[0].jsonObject - val contributors = row["contributors"] as JsonObject + val contributors = row["contributors"]!!.jsonObject - val metadata = contributors[METADATA] as JsonObject - metadata[KIND] shouldBe ColumnKind.Frame.toString() - metadata[NCOL] shouldBe 8 - metadata[NROW] shouldBe 29 + val metadata = contributors[METADATA]!!.jsonObject + metadata[KIND]!!.jsonPrimitive.content shouldBe ColumnKind.Frame.toString() + metadata[NCOL]!!.jsonPrimitive.int shouldBe 8 + metadata[NROW]!!.jsonPrimitive.int shouldBe 29 - val decodedData = contributors[DATA] as JsonArray<*> + val decodedData = contributors[DATA]!!.jsonArray decodedData.size shouldBe nestedFrameRowLimit } diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt index 28e4575ed6..f544c349b0 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt @@ -1,15 +1,18 @@ package org.jetbrains.kotlinx.dataframe.jupyter -import com.beust.klaxon.JsonArray -import com.beust.klaxon.JsonObject -import com.beust.klaxon.Parser import io.kotest.assertions.throwables.shouldNotThrow -import io.kotest.matchers.collections.shouldContain import io.kotest.matchers.comparables.shouldBeGreaterThan import io.kotest.matchers.comparables.shouldBeLessThan import io.kotest.matchers.shouldBe import io.kotest.matchers.string.shouldContain import io.kotest.matchers.string.shouldNotContain +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonArray +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.int +import kotlinx.serialization.json.jsonArray +import kotlinx.serialization.json.jsonObject +import kotlinx.serialization.json.jsonPrimitive import org.intellij.lang.annotations.Language import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.DATA import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME @@ -99,9 +102,9 @@ class RenderingTests : JupyterReplTestCase() { assertDataFrameDimensions(json, 30, 1) - val rows = json.array>(KOTLIN_DATAFRAME)!! - rows.getObj(0).int("id") shouldBe 21 - rows.getObj(rows.lastIndex).int("id") shouldBe 50 + val rows = json[KOTLIN_DATAFRAME]!!.jsonArray + rows.getObj(0)["id"]?.jsonPrimitive?.int shouldBe 21 + rows.getObj(rows.lastIndex)["id"]?.jsonPrimitive?.int shouldBe 50 } /** @@ -116,16 +119,15 @@ class RenderingTests : JupyterReplTestCase() { } private fun assertDataFrameDimensions(json: JsonObject, expectedRows: Int, expectedColumns: Int) { - json.obj(METADATA)!!.int("nrow") shouldBe expectedRows - json.obj(METADATA)!!.int("ncol") shouldBe expectedColumns + json[METADATA]!!.jsonObject["nrow"]!!.jsonPrimitive.int shouldBe expectedRows + json[METADATA]!!.jsonObject["ncol"]!!.jsonPrimitive.int shouldBe expectedColumns } private fun parseDataframeJson(result: MimeTypedResult): JsonObject { - val parser = Parser.default() - return parser.parse(StringBuilder(result["application/kotlindataframe+json"]!!)) as JsonObject + return Json.decodeFromString(result["application/kotlindataframe+json"]!!) } - private fun JsonArray<*>.getObj(index: Int) = this[index] as JsonObject + private fun JsonArray.getObj(index: Int) = this[index].jsonObject @Test fun `test kotlin notebook plugin utils sort by one column asc`() { @@ -143,10 +145,10 @@ class RenderingTests : JupyterReplTestCase() { @Suppress("UNCHECKED_CAST") private fun assertSortedById(json: JsonObject, desc: Boolean) { - val rows = json[KOTLIN_DATAFRAME] as JsonArray + val rows = json[KOTLIN_DATAFRAME]!!.jsonArray as List var previousId = if (desc) 101 else 0 - rows.forEach { row -> - val currentId = row.int("id")!! + rows.forEach { row: JsonObject -> + val currentId = row["id"]!!.jsonPrimitive.int if (desc) currentId shouldBeLessThan previousId else currentId shouldBeGreaterThan previousId previousId = currentId } @@ -182,25 +184,25 @@ class RenderingTests : JupyterReplTestCase() { assertDataFrameDimensions(json, 100, 2) - val rows = json[KOTLIN_DATAFRAME] as JsonArray + val rows = json[KOTLIN_DATAFRAME]!!.jsonArray as List assertSortedByCategory(rows) assertSortedById(rows) } - private fun assertSortedByCategory(rows: JsonArray) { + private fun assertSortedByCategory(rows: List) { rows.forEachIndexed { i, row -> - val currentCategory = row.string("category") + val currentCategory = row["category"]!!.jsonPrimitive.content if (i < 50) currentCategory shouldBe "odd" else currentCategory shouldBe "even" } } - private fun assertSortedById(rows: JsonArray) { + private fun assertSortedById(rows: List) { var previousCategory = "odd" var previousId = 0 for (row in rows) { - val currentCategory = row.string("category")!! - val currentId = row.int("id")!! + val currentCategory = row["category"]!!.jsonPrimitive.content + val currentId = row["id"]!!.jsonPrimitive.int if (previousCategory == "odd" && currentCategory == "even") { previousId shouldBeGreaterThan currentId @@ -226,7 +228,6 @@ class RenderingTests : JupyterReplTestCase() { df.group(col1, col2).into("group") """.trimIndent() ) - val jsonOutput = json.toJsonString(prettyPrint = true) val expectedOutput = """ { "${'$'}version": "2.1.0", @@ -360,7 +361,7 @@ class RenderingTests : JupyterReplTestCase() { }] } """.trimIndent() - jsonOutput shouldBe expectedOutput + json shouldBe Json.parseToJsonElement(expectedOutput) } @Test @@ -375,9 +376,9 @@ class RenderingTests : JupyterReplTestCase() { assertDataFrameDimensions(json, 2, 2) - val rows = json.array>(KOTLIN_DATAFRAME)!! - (rows.getObj(0).obj("group1")!![DATA] as JsonArray<*>).size shouldBe 10 - (rows.getObj(1).obj("group1")!![DATA] as JsonArray<*>).size shouldBe 10 + val rows = json[KOTLIN_DATAFRAME]!!.jsonArray + rows.getObj(0)["group1"]!!.jsonObject[DATA]!!.jsonArray.size shouldBe 10 + rows.getObj(1)["group1"]!!.jsonObject[DATA]!!.jsonArray.size shouldBe 10 } // Regression KTNB-424 From c0ba4112be067a91e4b83ad78a6085f30987787b Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Fri, 14 Jun 2024 18:17:48 +0200 Subject: [PATCH 10/10] Fixed compiler plugin tests for java 11 --- plugins/kotlin-dataframe/testData/box/toDataFrame.fir.ir.txt | 4 ---- .../kotlin-dataframe/testData/box/toDataFrame_dsl.fir.ir.txt | 4 ---- 2 files changed, 8 deletions(-) diff --git a/plugins/kotlin-dataframe/testData/box/toDataFrame.fir.ir.txt b/plugins/kotlin-dataframe/testData/box/toDataFrame.fir.ir.txt index 3f23a6ac6d..a717a2399e 100644 --- a/plugins/kotlin-dataframe/testData/box/toDataFrame.fir.ir.txt +++ b/plugins/kotlin-dataframe/testData/box/toDataFrame.fir.ir.txt @@ -259,10 +259,6 @@ FILE fqName: fileName:/toDataFrame.kt public final fun compareTo (other: E of kotlin.Enum): kotlin.Int declared in kotlin.Enum $this: VALUE_PARAMETER name: type:kotlin.Enum<.Switch> VALUE_PARAMETER name:other index:0 type:.Switch - FUN FAKE_OVERRIDE name:describeConstable visibility:public modality:FINAL <> ($this:kotlin.Enum<.Switch>) returnType:@[FlexibleNullability] java.util.Optional<@[FlexibleNullability] java.lang.Enum.EnumDesc<@[FlexibleNullability] .Switch?>?>? [fake_override] - overridden: - public final fun describeConstable (): @[FlexibleNullability] java.util.Optional<@[FlexibleNullability] java.lang.Enum.EnumDesc<@[FlexibleNullability] E of kotlin.Enum?>?>? declared in kotlin.Enum - $this: VALUE_PARAMETER name: type:kotlin.Enum<.Switch> FUN FAKE_OVERRIDE name:equals visibility:public modality:FINAL <> ($this:kotlin.Enum<.Switch>, other:kotlin.Any?) returnType:kotlin.Boolean [fake_override,operator] overridden: public final fun equals (other: kotlin.Any?): kotlin.Boolean declared in kotlin.Enum diff --git a/plugins/kotlin-dataframe/testData/box/toDataFrame_dsl.fir.ir.txt b/plugins/kotlin-dataframe/testData/box/toDataFrame_dsl.fir.ir.txt index 3c60b419e4..44111f08cf 100644 --- a/plugins/kotlin-dataframe/testData/box/toDataFrame_dsl.fir.ir.txt +++ b/plugins/kotlin-dataframe/testData/box/toDataFrame_dsl.fir.ir.txt @@ -321,10 +321,6 @@ FILE fqName: fileName:/toDataFrame_dsl.kt public final fun compareTo (other: E of kotlin.Enum): kotlin.Int declared in kotlin.Enum $this: VALUE_PARAMETER name: type:kotlin.Enum<.Switch> VALUE_PARAMETER name:other index:0 type:.Switch - FUN FAKE_OVERRIDE name:describeConstable visibility:public modality:FINAL <> ($this:kotlin.Enum<.Switch>) returnType:@[FlexibleNullability] java.util.Optional<@[FlexibleNullability] java.lang.Enum.EnumDesc<@[FlexibleNullability] .Switch?>?>? [fake_override] - overridden: - public final fun describeConstable (): @[FlexibleNullability] java.util.Optional<@[FlexibleNullability] java.lang.Enum.EnumDesc<@[FlexibleNullability] E of kotlin.Enum?>?>? declared in kotlin.Enum - $this: VALUE_PARAMETER name: type:kotlin.Enum<.Switch> FUN FAKE_OVERRIDE name:equals visibility:public modality:FINAL <> ($this:kotlin.Enum<.Switch>, other:kotlin.Any?) returnType:kotlin.Boolean [fake_override,operator] overridden: public final fun equals (other: kotlin.Any?): kotlin.Boolean declared in kotlin.Enum