Skip to content

Commit c3f046f

Browse files
committed
Merge pull request #1 from apache/master
merge lastest spark
2 parents dc0c449 + 3f00bb3 commit c3f046f

File tree

627 files changed

+30631
-11491
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

627 files changed

+30631
-11491
lines changed

.rat-excludes

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
target
2+
cache
23
.gitignore
34
.gitattributes
45
.project
@@ -18,6 +19,7 @@ fairscheduler.xml.template
1819
spark-defaults.conf.template
1920
log4j.properties
2021
log4j.properties.template
22+
metrics.properties
2123
metrics.properties.template
2224
slaves
2325
slaves.template

LICENSE

+16
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
771771
See the License for the specific language governing permissions and
772772
limitations under the License.
773773

774+
========================================================================
775+
For TestTimSort (core/src/test/java/org/apache/spark/util/collection/TestTimSort.java):
776+
========================================================================
777+
Copyright (C) 2015 Stijn de Gouw
778+
779+
Licensed under the Apache License, Version 2.0 (the "License");
780+
you may not use this file except in compliance with the License.
781+
You may obtain a copy of the License at
782+
783+
http://www.apache.org/licenses/LICENSE-2.0
784+
785+
Unless required by applicable law or agreed to in writing, software
786+
distributed under the License is distributed on an "AS IS" BASIS,
787+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
788+
See the License for the specific language governing permissions and
789+
limitations under the License.
774790

775791
========================================================================
776792
For LimitedInputStream

assembly/pom.xml

+10-111
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,6 @@
3636
<spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
3737
<spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
3838
<spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
39-
<deb.pkg.name>spark</deb.pkg.name>
40-
<deb.install.path>/usr/share/spark</deb.install.path>
41-
<deb.user>root</deb.user>
42-
<deb.bin.filemode>744</deb.bin.filemode>
4339
</properties>
4440

4541
<dependencies>
@@ -118,6 +114,16 @@
118114
<exclude>META-INF/*.RSA</exclude>
119115
</excludes>
120116
</filter>
117+
<filter>
118+
<!-- Exclude libgfortran, libgcc for license issues -->
119+
<artifact>org.jblas:jblas</artifact>
120+
<excludes>
121+
<!-- Linux amd64 is OK; not statically linked -->
122+
<exclude>lib/static/Linux/i386/**</exclude>
123+
<exclude>lib/static/Mac OS X/**</exclude>
124+
<exclude>lib/static/Windows/**</exclude>
125+
</excludes>
126+
</filter>
121127
</filters>
122128
</configuration>
123129
<executions>
@@ -217,113 +223,6 @@
217223
</plugins>
218224
</build>
219225
</profile>
220-
<profile>
221-
<id>deb</id>
222-
<build>
223-
<plugins>
224-
<plugin>
225-
<groupId>org.codehaus.mojo</groupId>
226-
<artifactId>buildnumber-maven-plugin</artifactId>
227-
<version>1.2</version>
228-
<executions>
229-
<execution>
230-
<phase>validate</phase>
231-
<goals>
232-
<goal>create</goal>
233-
</goals>
234-
<configuration>
235-
<shortRevisionLength>8</shortRevisionLength>
236-
</configuration>
237-
</execution>
238-
</executions>
239-
</plugin>
240-
<plugin>
241-
<groupId>org.vafer</groupId>
242-
<artifactId>jdeb</artifactId>
243-
<version>0.11</version>
244-
<executions>
245-
<execution>
246-
<phase>package</phase>
247-
<goals>
248-
<goal>jdeb</goal>
249-
</goals>
250-
<configuration>
251-
<deb>${project.build.directory}/${deb.pkg.name}_${project.version}-${buildNumber}_all.deb</deb>
252-
<attach>false</attach>
253-
<compression>gzip</compression>
254-
<dataSet>
255-
<data>
256-
<src>${spark.jar}</src>
257-
<type>file</type>
258-
<mapper>
259-
<type>perm</type>
260-
<user>${deb.user}</user>
261-
<group>${deb.user}</group>
262-
<prefix>${deb.install.path}/jars</prefix>
263-
</mapper>
264-
</data>
265-
<data>
266-
<src>${basedir}/src/deb/RELEASE</src>
267-
<type>file</type>
268-
<mapper>
269-
<type>perm</type>
270-
<user>${deb.user}</user>
271-
<group>${deb.user}</group>
272-
<prefix>${deb.install.path}</prefix>
273-
</mapper>
274-
</data>
275-
<data>
276-
<src>${basedir}/../conf</src>
277-
<type>directory</type>
278-
<mapper>
279-
<type>perm</type>
280-
<user>${deb.user}</user>
281-
<group>${deb.user}</group>
282-
<prefix>${deb.install.path}/conf</prefix>
283-
<filemode>744</filemode>
284-
</mapper>
285-
</data>
286-
<data>
287-
<src>${basedir}/../bin</src>
288-
<type>directory</type>
289-
<mapper>
290-
<type>perm</type>
291-
<user>${deb.user}</user>
292-
<group>${deb.user}</group>
293-
<prefix>${deb.install.path}/bin</prefix>
294-
<filemode>${deb.bin.filemode}</filemode>
295-
</mapper>
296-
</data>
297-
<data>
298-
<src>${basedir}/../sbin</src>
299-
<type>directory</type>
300-
<mapper>
301-
<type>perm</type>
302-
<user>${deb.user}</user>
303-
<group>${deb.user}</group>
304-
<prefix>${deb.install.path}/sbin</prefix>
305-
<filemode>744</filemode>
306-
</mapper>
307-
</data>
308-
<data>
309-
<src>${basedir}/../python</src>
310-
<type>directory</type>
311-
<mapper>
312-
<type>perm</type>
313-
<user>${deb.user}</user>
314-
<group>${deb.user}</group>
315-
<prefix>${deb.install.path}/python</prefix>
316-
<filemode>744</filemode>
317-
</mapper>
318-
</data>
319-
</dataSet>
320-
</configuration>
321-
</execution>
322-
</executions>
323-
</plugin>
324-
</plugins>
325-
</build>
326-
</profile>
327226
<profile>
328227
<id>kinesis-asl</id>
329228
<dependencies>

assembly/src/deb/RELEASE

-2
This file was deleted.

assembly/src/deb/control/control

-8
This file was deleted.

bin/compute-classpath.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ fi
7676

7777
num_jars=0
7878

79-
for f in ${assembly_folder}/spark-assembly*hadoop*.jar; do
79+
for f in "${assembly_folder}"/spark-assembly*hadoop*.jar; do
8080
if [[ ! -e "$f" ]]; then
8181
echo "Failed to find Spark assembly in $assembly_folder" 1>&2
8282
echo "You need to build Spark before running this program." 1>&2
@@ -88,7 +88,7 @@ done
8888

8989
if [ "$num_jars" -gt "1" ]; then
9090
echo "Found multiple Spark assembly jars in $assembly_folder:" 1>&2
91-
ls ${assembly_folder}/spark-assembly*hadoop*.jar 1>&2
91+
ls "${assembly_folder}"/spark-assembly*hadoop*.jar 1>&2
9292
echo "Please remove all but one jar." 1>&2
9393
exit 1
9494
fi

bin/run-example

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ fi
4242

4343
JAR_COUNT=0
4444

45-
for f in ${JAR_PATH}/spark-examples-*hadoop*.jar; do
45+
for f in "${JAR_PATH}"/spark-examples-*hadoop*.jar; do
4646
if [[ ! -e "$f" ]]; then
4747
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
4848
echo "You need to build Spark before running this program" 1>&2
@@ -54,7 +54,7 @@ done
5454

5555
if [ "$JAR_COUNT" -gt "1" ]; then
5656
echo "Found multiple Spark examples assembly jars in ${JAR_PATH}" 1>&2
57-
ls ${JAR_PATH}/spark-examples-*hadoop*.jar 1>&2
57+
ls "${JAR_PATH}"/spark-examples-*hadoop*.jar 1>&2
5858
echo "Please remove all but one jar." 1>&2
5959
exit 1
6060
fi

bin/spark-shell.cmd

100755100644
File mode changed.

bin/spark-submit2.cmd

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ set ORIG_ARGS=%*
2525
rem Reset the values of all variables used
2626
set SPARK_SUBMIT_DEPLOY_MODE=client
2727

28-
if not defined %SPARK_CONF_DIR% (
28+
if [%SPARK_CONF_DIR%] == [] (
2929
set SPARK_CONF_DIR=%SPARK_HOME%\conf
3030
)
3131
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_CONF_DIR%\spark-defaults.conf

bin/utils.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ function gatherSparkSubmitOpts() {
3535
--master | --deploy-mode | --class | --name | --jars | --packages | --py-files | --files | \
3636
--conf | --repositories | --properties-file | --driver-memory | --driver-java-options | \
3737
--driver-library-path | --driver-class-path | --executor-memory | --driver-cores | \
38-
--total-executor-cores | --executor-cores | --queue | --num-executors | --archives)
38+
--total-executor-cores | --executor-cores | --queue | --num-executors | --archives | \
39+
--proxy-user)
3940
if [[ $# -lt 2 ]]; then
4041
"$SUBMIT_USAGE_FUNCTION"
4142
exit 1;

bin/windows-utils.cmd

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ SET opts="%opts:~1,-1% \<--conf\> \<--properties-file\> \<--driver-memory\> \<--
3333
SET opts="%opts:~1,-1% \<--driver-library-path\> \<--driver-class-path\> \<--executor-memory\>"
3434
SET opts="%opts:~1,-1% \<--driver-cores\> \<--total-executor-cores\> \<--executor-cores\> \<--queue\>"
3535
SET opts="%opts:~1,-1% \<--num-executors\> \<--archives\> \<--packages\> \<--repositories\>"
36+
SET opts="%opts:~1,-1% \<--proxy-user\>"
3637

3738
echo %1 | findstr %opts% >nul
3839
if %ERRORLEVEL% equ 0 (

build/mvn

+4-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
2222
# Preserve the calling directory
2323
_CALLING_DIR="$(pwd)"
24+
# Options used during compilation
25+
_COMPILE_JVM_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
2426

2527
# Installs any application tarball given a URL, the expected tarball name,
2628
# and, optionally, a checkable binary path to determine if the binary has
@@ -136,14 +138,15 @@ cd "${_CALLING_DIR}"
136138
# Now that zinc is ensured to be installed, check its status and, if its
137139
# not running or just installed, start it
138140
if [ -n "${ZINC_INSTALL_FLAG}" -o -z "`${ZINC_BIN} -status`" ]; then
141+
export ZINC_OPTS=${ZINC_OPTS:-"$_COMPILE_JVM_OPTS"}
139142
${ZINC_BIN} -shutdown
140143
${ZINC_BIN} -start -port ${ZINC_PORT} \
141144
-scala-compiler "${SCALA_COMPILER}" \
142145
-scala-library "${SCALA_LIBRARY}" &>/dev/null
143146
fi
144147

145148
# Set any `mvn` options if not already present
146-
export MAVEN_OPTS=${MAVEN_OPTS:-"-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"}
149+
export MAVEN_OPTS=${MAVEN_OPTS:-"$_COMPILE_JVM_OPTS"}
147150

148151
# Last, call the `mvn` command as usual
149152
${MVN_BIN} "$@"

build/sbt

+28
Original file line numberDiff line numberDiff line change
@@ -125,4 +125,32 @@ loadConfigFile() {
125125
[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
126126
[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"
127127

128+
exit_status=127
129+
saved_stty=""
130+
131+
restoreSttySettings() {
132+
stty $saved_stty
133+
saved_stty=""
134+
}
135+
136+
onExit() {
137+
if [[ "$saved_stty" != "" ]]; then
138+
restoreSttySettings
139+
fi
140+
exit $exit_status
141+
}
142+
143+
saveSttySettings() {
144+
saved_stty=$(stty -g 2>/dev/null)
145+
if [[ ! $? ]]; then
146+
saved_stty=""
147+
fi
148+
}
149+
150+
saveSttySettings
151+
trap onExit INT
152+
128153
run "$@"
154+
155+
exit_status=$?
156+
onExit

build/sbt-launch-lib.bash

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ execRunner () {
8181
echo ""
8282
}
8383

84-
exec "$@"
84+
"$@"
8585
}
8686

8787
addJava () {

conf/metrics.properties.template

+9
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,15 @@
122122

123123
#worker.sink.csv.unit=minutes
124124

125+
# Enable Slf4jSink for all instances by class name
126+
#*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink
127+
128+
# Polling period for Slf4JSink
129+
#*.sink.sl4j.period=1
130+
131+
#*.sink.sl4j.unit=minutes
132+
133+
125134
# Enable jvm source for instance master, worker, driver and executor
126135
#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
127136

core/pom.xml

+8-10
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,14 @@
243243
<groupId>io.dropwizard.metrics</groupId>
244244
<artifactId>metrics-graphite</artifactId>
245245
</dependency>
246+
<dependency>
247+
<groupId>com.fasterxml.jackson.core</groupId>
248+
<artifactId>jackson-databind</artifactId>
249+
</dependency>
250+
<dependency>
251+
<groupId>com.fasterxml.jackson.module</groupId>
252+
<artifactId>jackson-module-scala_2.10</artifactId>
253+
</dependency>
246254
<dependency>
247255
<groupId>org.apache.derby</groupId>
248256
<artifactId>derby</artifactId>
@@ -321,16 +329,6 @@
321329
<artifactId>scalacheck_${scala.binary.version}</artifactId>
322330
<scope>test</scope>
323331
</dependency>
324-
<dependency>
325-
<groupId>org.easymock</groupId>
326-
<artifactId>easymockclassextension</artifactId>
327-
<scope>test</scope>
328-
</dependency>
329-
<dependency>
330-
<groupId>asm</groupId>
331-
<artifactId>asm</artifactId>
332-
<scope>test</scope>
333-
</dependency>
334332
<dependency>
335333
<groupId>junit</groupId>
336334
<artifactId>junit</artifactId>

core/src/main/java/org/apache/spark/util/collection/TimSort.java

+4-5
Original file line numberDiff line numberDiff line change
@@ -425,15 +425,14 @@ private void pushRun(int runBase, int runLen) {
425425
private void mergeCollapse() {
426426
while (stackSize > 1) {
427427
int n = stackSize - 2;
428-
if (n > 0 && runLen[n-1] <= runLen[n] + runLen[n+1]) {
428+
if ( (n >= 1 && runLen[n-1] <= runLen[n] + runLen[n+1])
429+
|| (n >= 2 && runLen[n-2] <= runLen[n] + runLen[n-1])) {
429430
if (runLen[n - 1] < runLen[n + 1])
430431
n--;
431-
mergeAt(n);
432-
} else if (runLen[n] <= runLen[n + 1]) {
433-
mergeAt(n);
434-
} else {
432+
} else if (runLen[n] > runLen[n + 1]) {
435433
break; // Invariant is established
436434
}
435+
mergeAt(n);
437436
}
438437
}
439438

0 commit comments

Comments
 (0)