Skip to content

Commit 115033a

Browse files
committed
added some tests to confirm behavior from createColumnGuessingType() from before the PR. Added allColsMakesColGroup argument for createColumnGuessingType() and guessValueType() so the old behavior of createColumn() is now controlled in the same place as all other conversions
1 parent 2535378 commit 115033a

File tree

11 files changed

+260
-73
lines changed

11 files changed

+260
-73
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,21 @@ public interface DataColumn<out T> : BaseColumn<T> {
142142
* @param name name of the column
143143
* @param values the values to represent each row in the column
144144
* @param nullable optionally you can specify whether [values] contains nulls, if `null` it is inferred.
145+
* @param allColsMakesColGroup if `true`, then, if all values are non-null same-sized columns,
146+
* a column group will be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`.
145147
*/
146148
public fun <T> createWithTypeInference(
147149
name: String,
148150
values: List<T>,
149151
nullable: Boolean? = null,
150-
): DataColumn<T> = createColumnGuessingType(name, values, nullable = nullable)
152+
allColsMakesColGroup: Boolean = false,
153+
): DataColumn<T> =
154+
createColumnGuessingType(
155+
name = name,
156+
values = values,
157+
nullable = nullable,
158+
allColsMakesColGroup = allColsMakesColGroup,
159+
)
151160

152161
/**
153162
* Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.withValuesImpl
2424
import org.jetbrains.kotlinx.dataframe.impl.asList
2525
import org.jetbrains.kotlinx.dataframe.impl.columnName
2626
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnAccessorImpl
27-
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumn
27+
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
2828
import org.jetbrains.kotlinx.dataframe.impl.columns.createComputedColumnReference
2929
import org.jetbrains.kotlinx.dataframe.impl.columns.forceResolve
3030
import org.jetbrains.kotlinx.dataframe.impl.columns.unbox
@@ -223,7 +223,13 @@ public class ColumnDelegate<T>(private val parent: ColumnGroupReference? = null)
223223
// region create DataColumn
224224

225225
public inline fun <reified T> columnOf(vararg values: T): DataColumn<T> =
226-
createColumn(values.asIterable(), typeOf<T>(), true).forceResolve()
226+
createColumnGuessingType(
227+
values = values.asIterable(),
228+
suggestedType = typeOf<T>(),
229+
guessTypeWithSuggestedAsUpperbound = true,
230+
listifyValues = false,
231+
allColsMakesColGroup = true,
232+
).forceResolve()
227233

228234
public fun columnOf(vararg values: AnyBaseCol): DataColumn<AnyRow> = columnOf(values.asIterable()).forceResolve()
229235

@@ -244,7 +250,12 @@ public fun <T> columnOf(frames: Iterable<DataFrame<T>>): FrameColumn<T> =
244250
).forceResolve()
245251

246252
public inline fun <reified T> column(values: Iterable<T>): DataColumn<T> =
247-
createColumn(values, typeOf<T>(), false).forceResolve()
253+
createColumnGuessingType(
254+
values = values,
255+
suggestedType = typeOf<T>(),
256+
guessTypeWithSuggestedAsUpperbound = false,
257+
allColsMakesColGroup = true,
258+
).forceResolve()
248259

249260
// endregion
250261

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
package org.jetbrains.kotlinx.dataframe.impl
44

5+
import org.jetbrains.kotlinx.dataframe.AnyCol
56
import org.jetbrains.kotlinx.dataframe.AnyFrame
67
import org.jetbrains.kotlinx.dataframe.AnyRow
8+
import org.jetbrains.kotlinx.dataframe.DataColumn
79
import org.jetbrains.kotlinx.dataframe.DataFrame
810
import org.jetbrains.kotlinx.dataframe.DataRow
911
import org.jetbrains.kotlinx.dataframe.api.Infer
@@ -391,14 +393,24 @@ internal fun <T> getValuesType(values: List<T>, type: KType, infer: Infer): KTyp
391393
* @param listifyValues if true, then values and nulls will be wrapped in a list if they appear among other lists.
392394
* For example: `[1, null, listOf(1, 2, 3)]` will become `List<Int>` instead of `Any?`
393395
* Note: this parameter is ignored if another [Collection] is present in the values.
396+
* @param allColsMakesRow if true, then, if all values are non-null same-sized columns, we assume
397+
* that a column group should be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`,
398+
* so the function will return [DataRow].
394399
*/
395400
@PublishedApi
396-
internal fun guessValueType(values: Sequence<Any?>, upperBound: KType? = null, listifyValues: Boolean = false): KType {
401+
internal fun guessValueType(
402+
values: Sequence<Any?>,
403+
upperBound: KType? = null,
404+
listifyValues: Boolean = false,
405+
allColsMakesRow: Boolean = false,
406+
): KType {
397407
val classes = mutableSetOf<KClass<*>>()
398408
val collectionClasses = mutableSetOf<KClass<out Collection<*>>>()
399409
var hasNulls = false
400410
var hasFrames = false
401411
var hasRows = false
412+
var hasCols = false
413+
val colSizes = mutableListOf<Int>()
402414
var hasList = false
403415
var allListsAreEmpty = true
404416
val classesInCollection = mutableSetOf<KClass<*>>()
@@ -412,6 +424,11 @@ internal fun guessValueType(values: Sequence<Any?>, upperBound: KType? = null, l
412424

413425
is AnyFrame -> hasFrames = true
414426

427+
is AnyCol -> {
428+
hasCols = true
429+
colSizes += it.size()
430+
}
431+
415432
is List<*> -> {
416433
hasList = true
417434
if (it.isNotEmpty()) allListsAreEmpty = false
@@ -449,6 +466,7 @@ internal fun guessValueType(values: Sequence<Any?>, upperBound: KType? = null, l
449466
classes.isNotEmpty() -> {
450467
if (hasRows) classes.add(DataRow::class)
451468
if (hasFrames) classes.add(DataFrame::class)
469+
if (hasCols) classes.add(DataColumn::class)
452470
if (hasList) {
453471
if (listifyValues) {
454472
val typeInLists = classesInCollection.commonType(
@@ -466,15 +484,18 @@ internal fun guessValueType(values: Sequence<Any?>, upperBound: KType? = null, l
466484
return classes.commonType(hasNulls, upperBound)
467485
}
468486

469-
hasNulls && !hasFrames && !hasRows && !hasList -> nothingType(nullable = true)
487+
hasNulls && !hasFrames && !hasRows && !hasList && !hasCols -> nothingType(nullable = true)
470488

471489
(hasFrames && (!hasList || allListsWithRows)) || (!hasFrames && allListsWithRows) ->
472490
DataFrame::class.createStarProjectedType(hasNulls)
473491

474-
hasRows && !hasFrames && !hasList ->
492+
hasRows && !hasFrames && !hasList && !hasCols ->
493+
DataRow::class.createStarProjectedType(false)
494+
495+
allColsMakesRow && hasCols && !hasFrames && !hasList && !hasRows && !hasNulls && colSizes.distinct().size == 1 ->
475496
DataRow::class.createStarProjectedType(false)
476497

477-
collectionClasses.isNotEmpty() && !hasFrames && !hasRows -> {
498+
collectionClasses.isNotEmpty() && !hasFrames && !hasRows && !hasCols -> {
478499
val elementType = upperBound?.let {
479500
if (it.jvmErasure.isSubclassOf(Collection::class)) {
480501
it.projectUpTo(Collection::class).arguments[0].type
@@ -492,7 +513,7 @@ internal fun guessValueType(values: Sequence<Any?>, upperBound: KType? = null, l
492513
).withNullability(hasNulls)
493514
}
494515

495-
hasList && collectionClasses.isEmpty() && !hasFrames && !hasRows -> {
516+
hasList && collectionClasses.isEmpty() && !hasFrames && !hasRows && !hasCols -> {
496517
val elementType = upperBound?.let { if (it.jvmErasure == List::class) it.arguments[0].type else null }
497518
List::class
498519
.createTypeWithArgument(
@@ -507,6 +528,7 @@ internal fun guessValueType(values: Sequence<Any?>, upperBound: KType? = null, l
507528
if (hasRows) classes.add(DataRow::class)
508529
if (hasFrames) classes.add(DataFrame::class)
509530
if (hasList) classes.add(List::class)
531+
if (hasCols) classes.add(DataColumn::class)
510532
if (collectionClasses.isNotEmpty()) classes.addAll(collectionClasses)
511533
return classes.commonType(hasNulls, upperBound)
512534
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,6 @@ internal fun NamedValue.toColumnWithPath() =
2323
name = path.last(),
2424
values = listOf(value),
2525
suggestedType = type,
26-
suggestedTypeIsUpperBound = guessType,
26+
guessTypeWithSuggestedAsUpperbound = guessType,
2727
defaultValue = default,
2828
)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ internal fun <T> concatImpl(name: String, columns: List<DataColumn<T>?>, columnS
7777
name = name,
7878
values = list,
7979
suggestedType = tartypeOf,
80-
suggestedTypeIsUpperBound = guessType,
80+
guessTypeWithSuggestedAsUpperbound = guessType,
8181
defaultValue = defaultValue,
8282
).cast()
8383
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt

Lines changed: 61 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext
2828
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
2929
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
3030
import org.jetbrains.kotlinx.dataframe.columns.ColumnsResolver
31+
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
3132
import org.jetbrains.kotlinx.dataframe.columns.toColumnsSetOf
3233
import org.jetbrains.kotlinx.dataframe.impl.DataFrameReceiver
3334
import org.jetbrains.kotlinx.dataframe.impl.DataRowImpl
@@ -102,48 +103,6 @@ internal fun <T, R> computeValues(df: DataFrame<T>, expression: AddExpression<T,
102103
return nullable to list
103104
}
104105

105-
@Suppress("UNCHECKED_CAST")
106-
@PublishedApi
107-
internal fun <T> createColumn(values: Iterable<T>, suggestedType: KType, guessType: Boolean = false): DataColumn<T> =
108-
when {
109-
// values is a non-empty list of AnyRows
110-
values.any() && values.all { it is AnyRow } ->
111-
DataColumn.createColumnGroup(
112-
name = "",
113-
df = (values as Iterable<AnyRow>).toDataFrame(),
114-
).asDataColumn().cast()
115-
116-
// values is a non-empty list of DataColumns
117-
values.any() && values.all { it is AnyCol } ->
118-
DataColumn.createColumnGroup(
119-
name = "",
120-
df = (values as Iterable<AnyCol>).toDataFrame(),
121-
).asDataColumn().cast()
122-
123-
// values is a non-empty list of DataFrames and nulls
124-
// (but not just nulls; we cannot assume that should create a FrameColumn)
125-
values.any() && values.all { it is AnyFrame? } && !values.all { it == null } ->
126-
DataColumn.createFrameColumn(
127-
name = "",
128-
groups = values.map { it as? AnyFrame ?: DataFrame.empty() },
129-
).asDataColumn().cast()
130-
131-
guessType ->
132-
createColumnGuessingType(
133-
name = "",
134-
values = values.asList(),
135-
suggestedType = suggestedType,
136-
suggestedTypeIsUpperBound = true,
137-
).cast()
138-
139-
else ->
140-
DataColumn.createValueColumn(
141-
name = "",
142-
values = values.toList(),
143-
type = suggestedType,
144-
)
145-
}
146-
147106
// endregion
148107

149108
// region create Columns
@@ -217,30 +176,82 @@ internal fun Array<out String>.toNumberColumns() = toColumnsSetOf<Number>()
217176

218177
// endregion
219178

179+
/**
180+
* Creates a new column by doing type inference on the given values and
181+
* some conversions to unify the values if necessary.
182+
*
183+
* @param values values to create a column from
184+
* @param suggestedType optional suggested type for values.
185+
* If set to `null` (default) the type will be inferred.
186+
* @param guessTypeWithSuggestedAsUpperbound Only relevant when [suggestedType]` != null`.
187+
* If `true`, type inference will happen with the given [suggestedType] as the supertype.
188+
* @param defaultValue optional default value for the column used when a [ValueColumn] is created.
189+
* @param nullable optional hint for the column nullability, used when a [ValueColumn] is created.
190+
* @param listifyValues if `true`, then values and nulls will be wrapped in a list if they appear among other lists.
191+
* For example: `[1, null, listOf(1, 2, 3)]` will become `[[1], [], [1, 2, 3]]`.
192+
* Note: this parameter is ignored if another [Collection] is present in the values.
193+
* @param allColsMakesColGroup if `true`, then, if all values are non-null same-sized columns,
194+
* a column group will be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`.
195+
*/
196+
@PublishedApi
197+
internal fun <T> createColumnGuessingType(
198+
values: Iterable<T>,
199+
suggestedType: KType? = null,
200+
guessTypeWithSuggestedAsUpperbound: Boolean = false,
201+
defaultValue: T? = null,
202+
nullable: Boolean? = null,
203+
listifyValues: Boolean = false,
204+
allColsMakesColGroup: Boolean = false,
205+
): DataColumn<T> =
206+
createColumnGuessingType(
207+
name = "",
208+
values = values,
209+
suggestedType = suggestedType,
210+
guessTypeWithSuggestedAsUpperbound = guessTypeWithSuggestedAsUpperbound,
211+
defaultValue = defaultValue,
212+
nullable = nullable,
213+
listifyValues = listifyValues,
214+
allColsMakesColGroup = allColsMakesColGroup,
215+
)
216+
217+
/**
218+
* @include [createColumnGuessingType]
219+
* @param name name for the column
220+
*/
220221
@PublishedApi
221222
internal fun <T> createColumnGuessingType(
222223
name: String,
223-
values: List<T>,
224+
values: Iterable<T>,
224225
suggestedType: KType? = null,
225-
suggestedTypeIsUpperBound: Boolean = false,
226+
guessTypeWithSuggestedAsUpperbound: Boolean = false,
226227
defaultValue: T? = null,
227228
nullable: Boolean? = null,
229+
listifyValues: Boolean = false,
230+
allColsMakesColGroup: Boolean = false,
228231
): DataColumn<T> {
229-
val detectType = suggestedType == null || suggestedTypeIsUpperBound
232+
val detectType = suggestedType == null || guessTypeWithSuggestedAsUpperbound
230233
val type = if (detectType) {
231234
guessValueType(
232235
values = values.asSequence(),
233236
upperBound = suggestedType,
234-
listifyValues = false,
237+
listifyValues = listifyValues,
238+
allColsMakesRow = allColsMakesColGroup,
235239
)
236240
} else {
237241
suggestedType!!
238242
}
239243

240244
return when (type.classifier!! as KClass<*>) {
241245
DataRow::class -> {
242-
val df = values.map { (it as AnyRow?)?.toDataFrame() ?: DataFrame.empty(1) }.concat()
243-
DataColumn.createColumnGroup(name, df).asDataColumn().cast()
246+
// guessValueType can only return DataRow if all values are AnyRow?
247+
// or all are AnyCol and they all have the same size
248+
if (values.firstOrNull() is AnyCol) {
249+
val df = dataFrameOf(values as Iterable<AnyCol>)
250+
DataColumn.createColumnGroup(name, df)
251+
} else {
252+
val df = values.map { (it as AnyRow?)?.toDataFrame() ?: DataFrame.empty(1) }.concat()
253+
DataColumn.createColumnGroup(name, df)
254+
}.asDataColumn().cast()
244255
}
245256

246257
DataFrame::class -> {
@@ -292,15 +303,15 @@ internal fun <T> createColumnGuessingType(
292303
if (nullable == null) {
293304
DataColumn.createValueColumn(
294305
name = name,
295-
values = values,
306+
values = values.asList(),
296307
type = type,
297308
infer = if (detectType) Infer.None else Infer.Nulls,
298309
defaultValue = defaultValue,
299310
)
300311
} else {
301312
DataColumn.createValueColumn(
302313
name = name,
303-
values = values,
314+
values = values.asList(),
304315
type = type.withNullability(nullable),
305316
defaultValue = defaultValue,
306317
)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
3939
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
4040
import org.jetbrains.kotlinx.dataframe.impl.DataCollectorBase
4141
import org.jetbrains.kotlinx.dataframe.impl.asList
42-
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumn
42+
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
4343
import org.jetbrains.kotlinx.dataframe.impl.commonType
4444
import org.jetbrains.kotlinx.dataframe.impl.createDataCollector
4545
import org.jetbrains.kotlinx.dataframe.impl.guessValueType
@@ -336,8 +336,11 @@ internal fun fromJsonListAnyColumns(
336336

337337
dataFrameOf(
338338
columnOf(*map.keys.toTypedArray()).named(KeyValueProperty<*>::key.name),
339-
createColumn(values = map.values, suggestedType = valueType, guessType = false)
340-
.named(KeyValueProperty<*>::value.name),
339+
createColumnGuessingType(
340+
values = map.values,
341+
suggestedType = valueType,
342+
guessTypeWithSuggestedAsUpperbound = false,
343+
).named(KeyValueProperty<*>::value.name),
341344
)
342345
}
343346

@@ -516,10 +519,10 @@ internal fun fromJsonListArrayAndValueColumns(
516519

517520
dataFrameOf(
518521
columnOf(*map.keys.toTypedArray()).named(KeyValueProperty<*>::key.name),
519-
createColumn(
522+
createColumnGuessingType(
520523
values = map.values,
521524
suggestedType = valueType,
522-
guessType = false,
525+
guessTypeWithSuggestedAsUpperbound = false,
523526
).named(KeyValueProperty<*>::value.name),
524527
)
525528
}

0 commit comments

Comments
 (0)