Skip to content

Commit

Permalink
changed additionalCsv arguments to a lambda with the builder instance…
Browse files Browse the repository at this point in the history
…. This allows us to configure the builder first and then let users modify it to their heart's desire.
  • Loading branch information
Jolanrensen committed Nov 5, 2024
1 parent 21028f3 commit 16c60b5
Show file tree
Hide file tree
Showing 17 changed files with 107 additions and 95 deletions.
24 changes: 12 additions & 12 deletions dataframe-csv/api/dataframe-csv.api

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package org.jetbrains.kotlinx.dataframe.documentation
import io.deephaven.csv.CsvSpecs
import org.apache.commons.csv.CSVFormat
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat
import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs
import org.jetbrains.kotlinx.dataframe.io.ColType
import org.jetbrains.kotlinx.dataframe.io.Compression
import org.jetbrains.kotlinx.dataframe.io.DEFAULT_PARSER_OPTIONS
Expand Down Expand Up @@ -158,13 +160,12 @@ internal object DelimParams {
const val PARSE_PARALLEL: Boolean = true

/**
* @param additionalCsvSpecs Optional [CsvSpecs]. Default: `null`.
* @param adjustCsvSpecs Optional extra [CsvSpecs] configuration. Default: `{ it }`.
*
* A [CsvSpecs] instance can be supplied to configure additional
* parsing options not covered by the other parameters.
* The (default) values of other parameters will override the values in \[additionalCsvSpecs\].
* Before instantiating the [CsvSpecs], the [CsvSpecs.Builder] will be passed to this lambda.
* This will allow you to configure/overwrite any CSV / TSV parsing options.
*/
val ADDITIONAL_CSV_SPECS: CsvSpecs? = null
val ADJUST_CSV_SPECS: AdjustCsvSpecs = { it }

/** @param includeHeader Whether to include the header in the output. Default: `true`. */
const val INCLUDE_HEADER: Boolean = true
Expand Down Expand Up @@ -200,11 +201,10 @@ internal object DelimParams {
val HEADER_COMMENTS: List<String> = emptyList()

/**
* @param additionalCsvFormat Optional [CSVFormat]. Default: [CSVFormat.DEFAULT].
* @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
*
* A [CSVFormat] instance can be supplied to configure additional CSV / TSV printing options
* not covered by the other parameters. The (default) values of other parameters will override the values in
* [additionalCsvFormat].
* Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
* This will allow you to configure/overwrite any CSV / TSV writing options.
*/
val ADDITIONAL_CSV_FORMAT: CSVFormat = CSVFormat.DEFAULT
val ADJUST_CSV_FORMAT: AdjustCSVFormat = { it }
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.parse
import org.jetbrains.kotlinx.dataframe.api.tryParse
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADDITIONAL_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.COMPRESSION
Expand All @@ -45,6 +45,7 @@ import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.READ_LINES
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.SKIP_LINES
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.TRIM_INSIDE_QUOTED
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs
import org.jetbrains.kotlinx.dataframe.io.ColType
import org.jetbrains.kotlinx.dataframe.io.Compression
import org.jetbrains.kotlinx.dataframe.io.DEFAULT_NULL_STRINGS
Expand Down Expand Up @@ -77,7 +78,7 @@ import kotlin.time.Duration
* @include [IGNORE_SURROUNDING_SPACES]
* @include [TRIM_INSIDE_QUOTED]
* @include [PARSE_PARALLEL]
* @include [ADDITIONAL_CSV_SPECS]
* @include [ADJUST_CSV_SPECS]
*/
internal fun readDelimImpl(
inputStream: InputStream,
Expand All @@ -95,11 +96,10 @@ internal fun readDelimImpl(
ignoreSurroundingSpaces: Boolean,
trimInsideQuoted: Boolean,
parseParallel: Boolean,
additionalCsvSpecs: CsvSpecs?,
adjustCsvSpecs: AdjustCsvSpecs,
): DataFrame<*> {
// set up the csv specs
val csvSpecs = with(CsvSpecs.builder()) {
if (additionalCsvSpecs != null) from(additionalCsvSpecs)
customDoubleParser(DataFrameCustomDoubleParser(parserOptions))
nullValueLiterals(parserOptions.nullStrings ?: DEFAULT_NULL_STRINGS)
headerLegalizer(::legalizeHeader)
Expand All @@ -124,7 +124,9 @@ internal fun readDelimImpl(

// this function must be last, so the return value is used
return@with this.withColTypes(colTypes, useDeepHavenLocalDateTime)
}.build()
}
.let { adjustCsvSpecs(it, it) }
.build()

val csvReaderResult = inputStream.useDecompressed(compression) { decompressedInputStream ->
// read the csv
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.api.forEach
import org.jetbrains.kotlinx.dataframe.documentation.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADDITIONAL_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.CSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.WRITER_WRITE
import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat
import org.jetbrains.kotlinx.dataframe.io.QuoteMode
import org.jetbrains.kotlinx.dataframe.io.toJson
import org.apache.commons.csv.QuoteMode as ApacheQuoteMode
Expand All @@ -19,7 +20,7 @@ import org.apache.commons.csv.QuoteMode as ApacheQuoteMode
* @include [WRITER_WRITE]
* @include [CSV_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
* @include [ADDITIONAL_CSV_FORMAT]
* @include [ADJUST_CSV_FORMAT]
*/
internal fun writeDelimImpl(
df: AnyFrame,
Expand All @@ -32,10 +33,10 @@ internal fun writeDelimImpl(
commentChar: Char?,
headerComments: List<String>,
recordSeparator: String,
additionalCsvFormat: CSVFormat,
adjustCsvFormat: AdjustCSVFormat,
) {
// setup CSV format
val format = with(CSVFormat.Builder.create(additionalCsvFormat)) {
val format = with(CSVFormat.Builder.create(CSVFormat.DEFAULT)) {
setDelimiter(delimiter)
setQuote(quote)
setSkipHeaderRecord(!includeHeader)
Expand All @@ -44,7 +45,8 @@ internal fun writeDelimImpl(
setEscape(escapeChar)
setCommentMarker(commentChar)
setHeaderComments(*headerComments.toTypedArray())
}.build()
}.let { adjustCsvFormat(it, it) }
.build()

// let the format handle the writing, only converting AnyRow and AnyFrame to JSON
format.print(writer).use { printer ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@

package org.jetbrains.kotlinx.dataframe.io

import io.deephaven.csv.CsvSpecs
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentation.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADDITIONAL_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.COMPRESSION
Expand Down Expand Up @@ -75,7 +74,7 @@ public fun DataFrame.Companion.readCsv(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}

Expand Down Expand Up @@ -123,7 +122,7 @@ public fun DataFrame.Companion.readCsv(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}

Expand Down Expand Up @@ -171,20 +170,20 @@ public fun DataFrame.Companion.readCsv(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}

/**
* {@comment the only one with additionalCsvSpecs}
* {@comment the only one with adjustCsvSpecs}
* @include [CommonReadDelimDocs.CsvDocs]
* @set [CommonReadDelimDocs.DataTitleArg] InputStream
* @set [CommonReadDelimDocs.DataArg] input stream
* @include [INPUT_STREAM_READ]
* @include [CSV_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
* @include [ADDITIONAL_CSV_SPECS]
* @include [ADJUST_CSV_SPECS]
*/
@ExperimentalCsv
public fun DataFrame.Companion.readCsv(
Expand All @@ -203,7 +202,7 @@ public fun DataFrame.Companion.readCsv(
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
additionalCsvSpecs: CsvSpecs? = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
): DataFrame<*> =
readDelimImpl(
inputStream = inputStream,
Expand All @@ -221,5 +220,5 @@ public fun DataFrame.Companion.readCsv(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = additionalCsvSpecs,
adjustCsvSpecs = adjustCsvSpecs,
)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentation.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADDITIONAL_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.CSV_DELIMITER
Expand Down Expand Up @@ -61,5 +61,5 @@ public fun DataFrame.Companion.readCsvStr(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@

package org.jetbrains.kotlinx.dataframe.io

import io.deephaven.csv.CsvSpecs
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentation.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADDITIONAL_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.COMPRESSION
Expand Down Expand Up @@ -81,7 +80,7 @@ public fun DataFrame.Companion.readDelim(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}

Expand Down Expand Up @@ -129,7 +128,7 @@ public fun DataFrame.Companion.readDelim(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}

Expand Down Expand Up @@ -177,20 +176,20 @@ public fun DataFrame.Companion.readDelim(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}

/**
* {@comment the only one with additionalCsvSpecs}
* {@comment the only one with adjustCsvSpecs}
* @include [CommonReadDelimDocs.DelimDocs]
* @set [CommonReadDelimDocs.DataTitleArg] InputStream
* @set [CommonReadDelimDocs.DataArg] input stream
* @include [INPUT_STREAM_READ]
* @include [DELIM_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
* @include [ADDITIONAL_CSV_SPECS]
* @include [ADJUST_CSV_SPECS]
*/
@ExperimentalCsv
public fun DataFrame.Companion.readDelim(
Expand All @@ -209,7 +208,7 @@ public fun DataFrame.Companion.readDelim(
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
additionalCsvSpecs: CsvSpecs? = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
): DataFrame<*> =
readDelimImpl(
inputStream = inputStream,
Expand All @@ -227,5 +226,5 @@ public fun DataFrame.Companion.readDelim(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = additionalCsvSpecs,
adjustCsvSpecs = adjustCsvSpecs,
)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentation.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADDITIONAL_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.DELIM_DELIMITER
Expand Down Expand Up @@ -61,5 +61,5 @@ public fun DataFrame.Companion.readDelimStr(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@

package org.jetbrains.kotlinx.dataframe.io

import io.deephaven.csv.CsvSpecs
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentation.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADDITIONAL_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.COMPRESSION
Expand Down Expand Up @@ -75,7 +74,7 @@ public fun DataFrame.Companion.readTsv(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}

Expand Down Expand Up @@ -123,7 +122,7 @@ public fun DataFrame.Companion.readTsv(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}

Expand Down Expand Up @@ -171,20 +170,20 @@ public fun DataFrame.Companion.readTsv(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}

/**
* {@comment the only one with additionalCsvSpecs}
* {@comment the only one with adjustCsvSpecs}
* @include [CommonReadDelimDocs.TsvDocs]
* @set [CommonReadDelimDocs.DataTitleArg] InputStream
* @set [CommonReadDelimDocs.DataArg] input stream
* @include [INPUT_STREAM_READ]
* @include [TSV_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
* @include [ADDITIONAL_CSV_SPECS]
* @include [ADJUST_CSV_SPECS]
*/
@ExperimentalCsv
public fun DataFrame.Companion.readTsv(
Expand All @@ -203,7 +202,7 @@ public fun DataFrame.Companion.readTsv(
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
additionalCsvSpecs: CsvSpecs? = ADDITIONAL_CSV_SPECS,
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
): DataFrame<*> =
readDelimImpl(
inputStream = inputStream,
Expand All @@ -221,5 +220,5 @@ public fun DataFrame.Companion.readTsv(
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
additionalCsvSpecs = additionalCsvSpecs,
adjustCsvSpecs = adjustCsvSpecs,
)
Loading

0 comments on commit 16c60b5

Please sign in to comment.