Skip to content

Commit

Permalink
Merge pull request #4 from adidas/release/5.3.0
Browse files Browse the repository at this point in the history
Integrate release/5.3.0 changes into master
  • Loading branch information
bemu authored Aug 27, 2020
2 parents d83f9c4 + f4d24d5 commit c22d447
Show file tree
Hide file tree
Showing 431 changed files with 8,495 additions and 4,279 deletions.
23 changes: 23 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
*.class
*.log
*.iml
.idea/*

# sbt specific
.cache/
.history/
.lib/
dist/*
project/project/
project/target/
src/main/java/
src/main/resources/
src/main/scala-2.10/
src/test/java/
src/test/scala-2.10/
target/
metastore_db/

# Scala-IDE specific
.scala_dependencies
.worksheet
189 changes: 189 additions & 0 deletions .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
version = "2.6.4"
maxColumn = 100
docstrings.wrap = yes
docstrings.style = SpaceAsterisk
comments.wrap = trailing
comments.wrapStandaloneSlcAsSlc = false
optIn.configStyleArguments = true
optIn.breaksInsideChains = false
optIn.breakChainOnFirstMethodDot = true
optIn.encloseClassicChains = false
optIn.selfAnnotationNewline = true
optIn.annotationNewlines = true
optIn.forceBlankLineBeforeDocstring = true
optIn.blankLineBeforeDocstring = false
binPack.unsafeCallSite = false
binPack.unsafeDefnSite = false
binPack.parentConstructors = never
binPack.literalArgumentLists = true
binPack.literalsIncludeSimpleExpr = false
binPack.literalsSingleLine = false
binPack.literalsMinArgCount = 5
binPack.literalsInclude = [
".*"
]
binPack.literalsExclude = [
String
"Term.Name"
]
continuationIndent.callSite = 2
continuationIndent.defnSite = 4
continuationIndent.ctorSite = null
continuationIndent.extendSite = 4
continuationIndent.withSiteRelativeToExtends = 0
align.multiline = false
align.stripMargin = true
align.openParenCallSite = false
align.openParenDefnSite = false
align.tokens = [
{
code = "=>"
owner = Case
}
]
align.arrowEnumeratorGenerator = false
align.ifWhileOpenParen = false
align.treeCategory."Defn.Trait" = "class/object/trait"
align.treeCategory."Defn.Object" = "class/object/trait"
align.treeCategory."Defn.Val" = "val/var/def"
align.treeCategory."Defn.Def" = "val/var/def"
align.treeCategory."Defn.Var" = "val/var/def"
align.treeCategory."Enumerator.Generator" = for
align.treeCategory."Enumerator.Val" = for
align.treeCategory."Defn.Class" = "class/object/trait"
spaces.beforeContextBoundColon = Never
spaces.afterTripleEquals = false
spaces.inImportCurlyBraces = false
spaces.inParentheses = false
spaces.neverAroundInfixTypes = []
spaces.afterKeywordBeforeParen = true
spaces.inByNameTypes = true
spaces.afterSymbolicDefs = false
literals.long = Upper
literals.float = Lower
literals.double = Lower
literals.hexDigits = Lower
literals.hexPrefix = Lower
literals.scientific = Lower
lineEndings = unix
rewrite.rules = [RedundantBraces]
rewrite.redundantBraces.methodBodies = true
rewrite.redundantBraces.includeUnitMethods = true
rewrite.redundantBraces.maxLines = 100
rewrite.redundantBraces.stringInterpolation = true
rewrite.redundantBraces.parensForOneLineApply = null
rewrite.redundantBraces.generalExpressions = true
rewrite.sortModifiers.order = [
"`implicit`"
"`final`"
"`sealed`"
"`abstract`"
"`override`"
"`private`"
"`protected`"
"`lazy`"
]
rewrite.neverInfix.includeFilters = [
"""[\w\d_]+"""
]
rewrite.neverInfix.excludeFilters = [
until
to
by
eq
ne
"should.*"
"contain.*"
"must.*"
in
ignore
be
taggedAs
thrownBy
synchronized
have
when
size
only
noneOf
oneElementOf
noElementsOf
atLeastOneElementOf
atMostOneElementOf
allElementsOf
inOrderElementsOf
theSameElementsAs
]
indentOperator.include = ".*"
indentOperator.exclude = """^(&&|\|\|)$"""
newlines.neverInResultType = false
newlines.neverBeforeJsNative = false
newlines.sometimesBeforeColonInMethodReturnType = true
newlines.penalizeSingleSelectMultiArgList = true
newlines.alwaysBeforeCurlyBraceLambdaParams = false
newlines.topLevelStatementsMinBreaks = 1
newlines.topLevelStatements = [before]
newlines.alwaysBeforeTopLevelStatements = false
newlines.implicitParamListModifierForce = []
newlines.implicitParamListModifierPrefer = null
newlines.alwaysBeforeElseAfterCurlyIf = false
newlines.alwaysBeforeMultilineDef = true
newlines.afterInfix = null
newlines.afterInfixBreakOnNested = false
newlines.afterInfixMaxCountPerExprForSome = 10
newlines.afterInfixMaxCountPerFile = 500
newlines.afterCurlyLambda = squash
newlines.avoidForSimpleOverflow = []
newlines.avoidAfterYield = true
runner.debug = false
runner.eventCallback = "<FormatEvent => Unit>"
runner.optimizer.dequeueOnNewStatements = true
runner.optimizer.escapeInPathologicalCases = true
runner.optimizer.maxVisitsPerToken = 10000
runner.optimizer.maxEscapes = 16
runner.optimizer.maxDepth = 100
runner.optimizer.acceptOptimalAtHints = true
runner.optimizer.disableOptimizationsInsideSensitiveAreas = true
runner.optimizer.pruneSlowStates = true
runner.optimizer.recurseOnBlocks = true
runner.optimizer.forceConfigStyleOnOffset = 150
runner.optimizer.forceConfigStyleMinArgCount = 2
runner.maxStateVisits = 1000000
runner.dialect = "scala211"
runner.ignoreWarnings = false
runner.fatalWarnings = false
indentYieldKeyword = true
importSelectors = noBinPack
unindentTopLevelOperators = false
includeCurlyBraceInSelectChains = true
includeNoParensInSelectChains = false
assumeStandardLibraryStripMargin = false
danglingParentheses.callSite = true
danglingParentheses.defnSite = true
danglingParentheses.ctrlSite = true
danglingParentheses.exclude = []
poorMansTrailingCommasInConfigStyle = false
trailingCommas = never
verticalMultiline.atDefnSite = false
verticalMultiline.arityThreshold = 100
verticalMultiline.newlineBeforeImplicitKW = false
verticalMultiline.newlineAfterImplicitKW = false
verticalMultiline.newlineAfterOpenParen = false
verticalMultiline.excludeDanglingParens = [
"`class`"
"`trait`"
]
verticalAlignMultilineOperators = false
onTestFailure = ""
encoding = "UTF-8"
project.git = true
project.files = []
project.includeFilters = [
""".*\.scala$"""
""".*\.sbt$"""
""".*\.sc$"""
]
project.excludeFilters = [
"target"
]
xmlLiterals.assumeFormatted = false
13 changes: 10 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM amazonlinux:2.0.20190508
FROM amazonlinux:2.0.20200722.0

ARG JAVA_VERSION=1.8.0

Expand All @@ -7,7 +7,14 @@ RUN curl https://bintray.com/sbt/rpm/rpm | tee /etc/yum.repos.d/bintray-sbt-rpm.

# Installing system dependencies
RUN yum update -y && \
yum install -y java-${JAVA_VERSION}-openjdk java-${JAVA_VERSION}-openjdk-devel sbt && \
yum clean all
yum install -y java-${JAVA_VERSION}-openjdk java-${JAVA_VERSION}-openjdk-devel sbt shadow-utils && \
yum clean all && \
rm -rf /var/cache/yum

RUN groupadd -r m3d && \
useradd -r -g m3d m3d && \
mkdir -p /home/m3d && \
chown m3d:m3d /home/m3d
USER m3d

CMD ["/bin/bash"]
7 changes: 7 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pipeline {

options {
ansiColor('xterm')
disableConcurrentBuilds()
Expand Down Expand Up @@ -35,6 +36,12 @@ pipeline {
}
}

stage('lint code') {
steps {
sh "./dev-env.sh project-lint -w ${workspace}"
}
}

stage('run tests') {
steps {
sh "./dev-env.sh project-test -w ${workspace}"
Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
M3D Engine
=======

![M3D logo](/static/images/m3d_logo.png)

**M3D** stands for _Metadata Driven Development_ and is a cloud and platform agnostic framework for the automated creation, management and governance of metadata and data flows from multiple source to multiple target systems. The main features and design goals of M3D are:

* Cloud and platform agnostic
Expand Down Expand Up @@ -46,7 +48,7 @@ M3D Engine supports:

* Loading structured and semi-structured data in Full mode
* Loading structured and semi-structured data in Append mode
* Loading structured and semi-structured data in Delta mode
* Loading structured and semi-structured data in Delta mode (DeltaLoad - in memory, by comparing new data and target table partitions; DeltaLakeLoad - using [Delta Lake IO](https://delta.io) capabilities)
* Decompression of compressed data
* Extraction from parquet file format
* Extraction from delimiter separated files (CSV,TSV,etc.)
Expand Down Expand Up @@ -102,7 +104,7 @@ The parameter file for the full load algorithm for example has the following con
* `delimiter` delimiter used in the case of `dsv` format
* `has_header` flag defining whether the input files have a header
* `partition_column` column that contains the partitioning information
* `partition_column_format` format of the partitioning column in the case of of time/date columns
* `partition_column_format` format of the partitioning column in the case of time/date columns
* `target_partitions` partitioning columns in the target
* `target_table` target table where the data will be available for querying after loading

Expand Down
53 changes: 29 additions & 24 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,27 +1,25 @@
import sbt.ExclusionRule

name := "m3d-engine"

version := "1.0"

scalaVersion := "2.11.12"
semanticdbEnabled := true
semanticdbVersion := scalafixSemanticdb.revision
scalacOptions += "-Ywarn-unused-import"

val sparkVersion = "2.4.0"
val sparkVersion = "2.4.4"
val hadoopVersion = "2.8.5"

conflictManager := sbt.ConflictManager.latestTime
conflictManager := sbt.ConflictManager.latestRevision

mainClass in Compile := Some("com.adidas.analytics.AlgorithmFactory")

// TODO: should be deleted as it exists in the Spark distribution
libraryDependencies += "org.scala-lang" % "scala-library" % scalaVersion.value

libraryDependencies += "joda-time" % "joda-time" % "2.9.3" % Provided
libraryDependencies += "org.joda" % "joda-convert" % "2.1.1"

libraryDependencies += "org.slf4j" % "slf4j-log4j12" % "1.7.16"
/* =====================
* Dependencies
* ===================== */

libraryDependencies += "org.apache.spark" %% "spark-core" % sparkVersion % Provided withExclusions Vector(
libraryDependencies += "org.apache.spark" %% "spark-core" % sparkVersion % Provided withExclusions Vector(
ExclusionRule("org.apache.hadoop", "hadoop-common"),
ExclusionRule("org.apache.hadoop", "hadoop-hdfs"),
ExclusionRule("com.google.guava", "guava")
Expand All @@ -32,30 +30,37 @@ libraryDependencies += "org.apache.spark" %% "spark-hive" % sparkVersion % Provi
)

libraryDependencies += "org.apache.hadoop" % "hadoop-common" % hadoopVersion % Provided withExclusions Vector(
ExclusionRule("io.netty", "netty-all")
ExclusionRule("io.netty", "netty-all")
)
libraryDependencies += "org.apache.hadoop" % "hadoop-hdfs" % hadoopVersion % Provided
libraryDependencies += "org.apache.hadoop" % "hadoop-distcp" % hadoopVersion % Provided

// TODO: replace exiting configuration with pureconfig
//libraryDependencies += "com.github.pureconfig" %% "pureconfig" % "0.9.2"
libraryDependencies += "joda-time" % "joda-time" % "2.9.3" % Provided
libraryDependencies += "org.joda" % "joda-convert" % "2.2.1"

libraryDependencies += "org.slf4j" % "slf4j-log4j12" % "1.7.30"

// Dependencies for test
libraryDependencies += "io.delta" %% "delta-core" % "0.6.1"

libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.5" % Test
/* =====================
* Dependencies for test
* ===================== */

libraryDependencies += "org.apache.hadoop" % "hadoop-hdfs" % hadoopVersion % Test classifier "tests" withExclusions Vector(
ExclusionRule("io.netty", "netty-all")
)
libraryDependencies += "org.apache.hadoop" % "hadoop-common" % hadoopVersion % Test classifier "tests" withExclusions Vector(
ExclusionRule("io.netty", "netty-all")
)
libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.1" % Test

libraryDependencies +=
"org.apache.hadoop" % "hadoop-hdfs" % hadoopVersion % Test classifier "tests" withExclusions Vector(
ExclusionRule("io.netty", "netty-all")
)
libraryDependencies +=
"org.apache.hadoop" % "hadoop-common" % hadoopVersion % Test classifier "tests" withExclusions Vector(
ExclusionRule("io.netty", "netty-all")
)

fork in Test := true

// disable parallel execution
parallelExecution in Test := false

// skipping tests when running assembly
test in assembly := {}
// skipping tests when running assembly
test in assembly := {}
4 changes: 2 additions & 2 deletions common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ function exec_command_within_container() {

if [[ -z "$LOCAL_IS_INTERACTIVE" ]]; then
echo "Executing command within container: $LOCAL_CMD"
docker exec "$LOCAL_CONTAINER_INSTANCE_NAME" bash -c "cd /root/workspace/${LOCAL_PROJECT_NAME} && ${LOCAL_CMD}"
docker exec "$LOCAL_CONTAINER_INSTANCE_NAME" bash -c "cd /m3d/workspace/${LOCAL_PROJECT_NAME} && ${LOCAL_CMD}"
else
echo "Executing command within container in interactive mode: $LOCAL_CMD"
docker exec -it "$LOCAL_CONTAINER_INSTANCE_NAME" bash -c "cd /root/workspace/${LOCAL_PROJECT_NAME} && ${LOCAL_CMD}"
docker exec -it "$LOCAL_CONTAINER_INSTANCE_NAME" bash -c "cd /m3d/workspace/${LOCAL_PROJECT_NAME} && ${LOCAL_CMD}"
fi
}

Expand Down
Loading

0 comments on commit c22d447

Please sign in to comment.