Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Doc/link to spark functions #138

Merged
merged 2 commits into from
Jan 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions core/src/main/scala/doric/syntax/BinaryColumns.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ private[syntax] trait BinaryColumns {
* as a 32 character hex string.
*
* @group Binary Type
* @see [[org.apache.spark.sql.functions.md5]]
*/
def md5: StringColumn = column.elem.map(f.md5).toDC

Expand All @@ -27,6 +28,7 @@ private[syntax] trait BinaryColumns {
* as a 40 character hex string.
*
* @group Binary Type
* @see [[org.apache.spark.sql.functions.sha1]]
*/
def sha1: StringColumn = column.elem.map(f.sha1).toDC

Expand All @@ -36,6 +38,7 @@ private[syntax] trait BinaryColumns {
*
* @throws java.lang.IllegalArgumentException if numBits is not in the permitted values
* @group Binary Type
* @see [[org.apache.spark.sql.functions.sha2]]
*/
def sha2(numBits: Int): StringColumn =
column.elem.map(x => f.sha2(x, numBits)).toDC
Expand All @@ -45,6 +48,7 @@ private[syntax] trait BinaryColumns {
* returns the value as a long column.
*
* @group Binary Type
* @see [[org.apache.spark.sql.functions.crc32]]
*/
def crc32: LongColumn = column.elem.map(f.crc32).toDC

Expand All @@ -53,6 +57,7 @@ private[syntax] trait BinaryColumns {
* This is the reverse of unbase64.
*
* @group Binary Type
* @see [[org.apache.spark.sql.functions.base64]]
*/
def base64: StringColumn = column.elem.map(f.base64).toDC

Expand All @@ -62,6 +67,7 @@ private[syntax] trait BinaryColumns {
* If either argument is null, the result will also be null.
*
* @group Binary Type
* @see [[org.apache.spark.sql.functions.decode]]
*/
def decode(charset: StringColumn): StringColumn =
(column.elem, charset.elem)
Expand Down
2 changes: 2 additions & 0 deletions core/src/main/scala/doric/syntax/BooleanColumns.scala
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ private[syntax] trait BooleanColumns {
*
* @throws java.lang.RuntimeException if the condition is false
* @group Boolean Type
* @see [[org.apache.spark.sql.functions.assert_true(c:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.assert_true]]
*/
def assertTrue: NullColumn = column.elem.map(f.assert_true).toDC

Expand All @@ -75,6 +76,7 @@ private[syntax] trait BooleanColumns {
*
* @throws java.lang.RuntimeException if the condition is false
* @group Boolean Type
* @see [[org.apache.spark.sql.functions.assert_true(c:org\.apache\.spark\.sql\.Column,e:* org.apache.spark.sql.functions.assert_true]]
*/
def assertTrue(msg: StringColumn): NullColumn =
(column.elem, msg.elem).mapN(f.assert_true).toDC
Expand Down
6 changes: 6 additions & 0 deletions core/src/main/scala/doric/syntax/CommonColumns.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
* the DoricColumns to coalesce
* @return
* the first column that is not null, or null if all inputs are null.
* @see [[org.apache.spark.sql.functions.coalesce]]
*/
def coalesce[T](cols: DoricColumn[T]*): DoricColumn[T] =
cols.map(_.elem).toList.sequence.map(f.coalesce(_: _*)).toDC
Expand All @@ -39,6 +40,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
* Calculates the hash code of given columns, and returns the result as an integer column.
*
* @group All Types
* @see [[org.apache.spark.sql.functions.hash]]
*/
def hash(cols: DoricColumn[_]*): IntegerColumn =
cols.map(_.elem).toList.sequence.map(f.hash(_: _*)).toDC
Expand All @@ -48,6 +50,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
* variant of the xxHash algorithm, and returns the result as a long column.
*
* @group All Types
* @see [[org.apache.spark.sql.functions.xxhash64]]
*/
def xxhash64(cols: DoricColumn[_]*): LongColumn =
cols.map(_.elem).toList.sequence.map(f.xxhash64(_: _*)).toDC
Expand Down Expand Up @@ -181,6 +184,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
* literals to compare to
* @return
* Boolean DoricColumn with the comparation logic.
* @see [[org.apache.spark.sql.Column.isin]]
*/
def isIn(elems: T*): BooleanColumn = column.elem.map(_.isin(elems: _*)).toDC

Expand All @@ -189,6 +193,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
* @group All Types
* @return
* Boolean DoricColumn
* @see [[org.apache.spark.sql.Column.isNull]]
*/
def isNull: BooleanColumn = column.elem.map(_.isNull).toDC

Expand All @@ -197,6 +202,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
* @group All Types
* @return
* Boolean DoricColumn
* @see [[org.apache.spark.sql.Column.isNotNull]]
*/
def isNotNull: BooleanColumn = column.elem.map(_.isNotNull).toDC

Expand Down
50 changes: 36 additions & 14 deletions core/src/main/scala/doric/syntax/DateColumns.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ private[syntax] trait DateColumns {
* All calls of current_date within the same query return the same value.
*
* @group Date Type
* @see [[org.apache.spark.sql.functions.current_date]]
*/
def currentDate(): DateColumn = f.current_date().asDoric[Date]

Expand All @@ -32,6 +33,7 @@ private[syntax] trait DateColumns {
* Date column after adding months
* @note
* Timestamp columns will be truncated to Date column
* @see [[org.apache.spark.sql.functions.add_months(startDate:org\.apache\.spark\.sql\.Column,numMonths:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.add_months]]
*/
def addMonths(nMonths: IntegerColumn): DateColumn =
(column.elem, nMonths.elem).mapN(f.add_months).toDC
Expand All @@ -44,6 +46,7 @@ private[syntax] trait DateColumns {
* @note
* Timestamp columns will be truncated to Date column
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.date_add(start:org\.apache\.spark\.sql\.Column,days:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.date_add]]
*/
def addDays(days: IntegerColumn): DateColumn =
(column.elem, days.elem).mapN(f.date_add).toDC
Expand All @@ -59,6 +62,7 @@ private[syntax] trait DateColumns {
* Use specialized functions like 'year' whenever possible as they benefit from a
* specialized implementation.
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.date_format]]
*/
def format(format: StringColumn): StringColumn =
(column.elem, format.elem)
Expand All @@ -75,6 +79,7 @@ private[syntax] trait DateColumns {
* @note
* Timestamp columns will be truncated to Date column
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.date_sub(start:org\.apache\.spark\.sql\.Column,days:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.date_sub]]
*/
def subDays(days: IntegerColumn): DateColumn =
(column.elem, days.elem).mapN(f.date_sub).toDC
Expand All @@ -85,6 +90,7 @@ private[syntax] trait DateColumns {
* @param dateCol
* A Date or Timestamp column
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.datediff]]
*/
def diff(dateCol: DoricColumn[T]): IntegerColumn =
(column.elem, dateCol.elem)
Expand All @@ -95,6 +101,7 @@ private[syntax] trait DateColumns {
* Extracts the day of the month as an integer from a given date.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.dayofmonth]]
*/
def dayOfMonth: IntegerColumn = column.elem.map(f.dayofmonth).toDC

Expand All @@ -103,20 +110,23 @@ private[syntax] trait DateColumns {
* Ranges from 1 for a Sunday through to 7 for a Saturday
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.dayofweek]]
*/
def dayOfWeek: IntegerColumn = column.elem.map(f.dayofweek).toDC

/**
* Extracts the day of the year as an integer from a given date.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.dayofyear]]
*/
def dayOfYear: IntegerColumn = column.elem.map(f.dayofyear).toDC

/**
* Sets the moment to the last day of the same month.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.last_day]]
*/
def endOfMonth: DateColumn = lastDayOfMonth

Expand All @@ -126,13 +136,15 @@ private[syntax] trait DateColumns {
* month in July 2015.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.last_day]]
*/
def lastDayOfMonth: DateColumn = column.elem.map(f.last_day).toDC

/**
* Extracts the month as an integer from a given date.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.month]]
*/
def month: IntegerColumn = column.elem.map(f.month).toDC

Expand All @@ -143,7 +155,7 @@ private[syntax] trait DateColumns {
* of their respective months. Otherwise, the difference is calculated assuming 31 days per month.
*
* For example:
* {{{
* @example {{{
* Date("2017-11-14").monthsBetween(Date("2017-07-14")) // returns 4.0
* Date("2017-01-01").monthsBetween(Date("2017-01-10")) // returns 0.29032258
* Timestamp("2017-06-01 00:00:00").monthsBetween(Timestamp("2017-06-16 12:00:00")) // returns -0.5
Expand All @@ -152,6 +164,7 @@ private[syntax] trait DateColumns {
* @param dateCol
* Date or Timestamp column
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.months_between(end:org\.apache\.spark\.sql\.Column,start:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.months_between]]
*/
def monthsBetween(dateCol: DoricColumn[T]): DoubleColumn =
(column.elem, dateCol.elem).mapN(f.months_between).toDC
Expand All @@ -165,6 +178,7 @@ private[syntax] trait DateColumns {
* If `roundOff` is set to true, the result is rounded off to 8 digits;
* it is not rounded otherwise.
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.months_between(end:org\.apache\.spark\.sql\.Column,start:org\.apache\.spark\.sql\.Column,roundOff:* org.apache.spark.sql.functions.months_between]]
*/
def monthsBetween(
dateCol: DoricColumn[T],
Expand All @@ -180,14 +194,15 @@ private[syntax] trait DateColumns {
* Returns the first date which is later than the value of the `date` column that is on the
* specified day of the week.
*
* For example, `Date("2015-07-27").nextDay("Sunday")` returns Date("2015-08-02") because
* that is the first Sunday after 2015-07-27.
* @example For example, `Date("2015-07-27").nextDay("Sunday")` returns Date("2015-08-02")
* because that is the first Sunday after 2015-07-27.
*
* @param dayOfWeek
* Case insensitive, and accepts: "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"
* @note
* Timestamp columns will be truncated to Date column
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.next_day]]
*/
def nextDay(dayOfWeek: StringColumn): DateColumn =
(column.elem, dayOfWeek.elem)
Expand All @@ -200,28 +215,30 @@ private[syntax] trait DateColumns {
* Extracts the quarter as an integer from a given date.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.quarter]]
*/
def quarter: IntegerColumn = column.elem.map(f.quarter).toDC

/**
* Returns date truncated to the unit specified by the format.
*
* For example, `Timestamp("2018-11-19 12:01:19").trunc("year")` returns Date("2018-01-01")
* @example For example, `Timestamp("2018-11-19 12:01:19").trunc("year")` returns Date("2018-01-01")
*
* @param format
* if date:
* * 'year', 'yyyy', 'yy' to truncate by year,
* * 'month', 'mon', 'mm' to truncate by month
* Other options are: 'week', 'quarter'
* if timestamp:
* * 'year', 'yyyy', 'yy' to truncate by year,
* * 'month', 'mon', 'mm' to truncate by month,
* * 'day', 'dd' to truncate by day,
* Other options are:
* * 'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter'
* - if <b>date</b>:
* - 'year', 'yyyy', 'yy' to truncate by year,
* - 'month', 'mon', 'mm' to truncate by month
* - __Other options are__: 'week', 'quarter'
* - if <b>timestamp</b>:
* - 'year', 'yyyy', 'yy' to truncate by year,
* - 'month', 'mon', 'mm' to truncate by month,
* - 'day', 'dd' to truncate by day,
* - __Other options are__: 'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter'
* @note
* Timestamp columns will be truncated to Date column
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.trunc]]
* @see [[org.apache.spark.sql.functions.date_trunc]]
*/
def truncate(format: StringColumn): DoricColumn[T] =
(column.elem, format.elem)
Expand All @@ -243,6 +260,7 @@ private[syntax] trait DateColumns {
* A long
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.unix_timestamp(s:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.unix_timestamp]]
*/
def unixTimestamp: LongColumn = column.elem.map(f.unix_timestamp).toDC

Expand All @@ -253,27 +271,31 @@ private[syntax] trait DateColumns {
* as defined by ISO 8601
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.weekofyear]]
*/
def weekOfYear: IntegerColumn = column.elem.map(f.weekofyear).toDC

/**
* Extracts the year as an integer from a given date.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.year]]
*/
def year: IntegerColumn = column.elem.map(f.year).toDC

/**
* Transform date to timestamp
*
* @group Date Type
* @see [[org.apache.spark.sql.functions.to_timestamp(s:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.to_timestamp]]
*/
def toTimestamp: TimestampColumn = column.elem.map(f.to_timestamp).toDC

/**
* Transform date to Instant
*
* @group Date Type
* @see [[org.apache.spark.sql.functions.to_timestamp(s:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.to_timestamp]]
*/
def toInstant: InstantColumn = column.elem.map(f.to_timestamp).toDC
}
Expand Down
Loading