Shell实战

1
./dev/make-distribution.sh --name "hadoop2-without-hive" --tgz "-Pyarn,hadoop-provided,hadoop-2.7,parquet-provided,orc-provided,-Pflume,-Pkubernetes"
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
#!/usr/bin/env bash
# 以上设置Shell环境为Bourne Again Shell
#
# Script to create a binary distribution for easy deploys of Spark.
# The distribution directory defaults to dist/ but can be overridden below.
# The distribution contains fat (assembly) jars that include the Scala library,
# so it is completely self contained.
# It does not contain source or *.class files.

set -o pipefail
# 若指令传回值不等于0,则立即退出shell。
set -e
# 执行指令后,会先显示该指令及所下的参数
set -x

# 执行pwd指令可立刻得知您目前所在的工作目录的绝对路径名称。
# Figure out where the Spark framework is installed
SPARK_HOME="$(cd "`dirname "$0"`/.."; pwd)"
DISTDIR="$SPARK_HOME/dist"

MAKE_TGZ=false
MAKE_PIP=false
MAKE_R=false
NAME=none
MVN="$SPARK_HOME/build/mvn"

function exit_with_usage {
# 取消参数显式
set +x
echo "make-distribution.sh - tool for making binary distributions of Spark"
echo ""
echo "usage:"
cl_options="[--name] [--tgz] [--pip] [--r] [--mvn <mvn-command>]"
echo "make-distribution.sh $cl_options <maven build options>"
echo "See Spark's \"Building Spark\" doc for correct Maven options."
echo ""
# 非正常返回
exit 1
}

# Parse arguments
# $# 传递脚本或函数参数
while (( "$#" )); do
case $1 in
--tgz)
MAKE_TGZ=true
# break
;;
--pip)
MAKE_PIP=true
;;
--r)
MAKE_R=true
;;
--mvn)
MVN="$2"
# 位置参数左移一位
# 效果是$1失效,其余变量下标-1
shift
;;
--name)
NAME="$2"
shift
;;
--help)
exit_with_usage
;;
--*)
echo "Error: $1 is not supported"
exit_with_usage
;;
-*)
break
;;
# default
*)
echo "Error: $1 is not supported"
exit_with_usage
;;
esac
shift
done

if [ -z "$JAVA_HOME" ]; then
# Fall back on JAVA_HOME from rpm, if found
# command 判断命令是否存在
if [ $(command -v rpm) ]; then
# rpm -E卸载套件
RPM_JAVA_HOME="$(rpm -E %java_home 2>/dev/null)"
if [ "$RPM_JAVA_HOME" != "%java_home" ]; then
JAVA_HOME="$RPM_JAVA_HOME"
echo "No JAVA_HOME set, proceeding with '$JAVA_HOME' learned from rpm"
fi
fi

if [ -z "$JAVA_HOME" ]; then
if [ `command -v java` ]; then
# If java is in /usr/bin/java, we want /usr
# which输出文件绝对路径
# 嵌套两个dirname取java文件的祖父目录/usr
JAVA_HOME="$(dirname $(dirname $(which java)))"
fi
fi
fi

if [ -z "$JAVA_HOME" ]; then
echo "Error: JAVA_HOME is not set, cannot proceed."
exit -1
fi

if [ $(command -v git) ]; then
# 获取最新commit id
GITREV=$(git rev-parse --short HEAD 2>/dev/null || :)
if [ ! -z "$GITREV" ]; then
# 设置字符串,没有revision指令
GITREVSTRING=" (git revision $GITREV)"
fi
unset GITREV
fi


if [ ! "$(command -v "$MVN")" ] ; then
echo -e "Could not locate Maven command: '$MVN'."
echo -e "Specify the Maven command with the --mvn flag"
exit -1;
fi

# 获取版本信息,使用mvn help:evaluate插件
# 使用内嵌的mvn脚本执行,可能安装mvn、scala和zinc
# 在Soft Quota中,$@以字符串列表返回所有参数,$#以单个字符串返回所有参数
# |前面命令成功后才继续执行
# grep -v显式不包含正则的所有行
VERSION=$("$MVN" help:evaluate -Dexpression=project.version $@ \
| grep -v "INFO"\
| grep -v "WARNING"\
# 取首行
| tail -n 1)
SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version $@ \
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
SPARK_HADOOP_VERSION=$("$MVN" help:evaluate -Dexpression=hadoop.version $@ \
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
# fgrep用于查找文件里符合条件的字符,相当于grep -f
# echo -n不换行输出
SPARK_HIVE=$("$MVN" help:evaluate -Dexpression=project.activeProfiles -pl sql/hive $@ \
| grep -v "INFO"\
| grep -v "WARNING"\
| fgrep --count "<id>hive</id>";\
# Reset exit status to 0, otherwise the script stops here if the last grep finds nothing\
# because we use "set -o pipefail"
echo -n)

if [ "$NAME" == "none" ]; then
NAME=$SPARK_HADOOP_VERSION
fi

echo "Spark version is $VERSION"

if [ "$MAKE_TGZ" == "true" ]; then
echo "Making spark-$VERSION-bin-$NAME.tgz"
else
echo "Making distribution for Spark $VERSION in '$DISTDIR'..."
fi

# Build uber fat JAR
cd "$SPARK_HOME"

export MAVEN_OPTS="${MAVEN_OPTS:--Xmx2g -XX:ReservedCodeCacheSize=1g}"

# 由于双引号内视为文本,参数引用和单引号也一视同仁
# Store the command as an array because $MVN variable might have spaces in it.
# Normal quoting tricks don't work.
# See: http://mywiki.wooledge.org/BashFAQ/050
BUILD_COMMAND=("$MVN" clean package -DskipTests $@)

# Actually build the jar
echo -e "\nBuilding with..."
echo -e "\$ ${BUILD_COMMAND[@]}\n"

"${BUILD_COMMAND[@]}"

# Make directories
rm -rf "$DISTDIR"
mkdir -p "$DISTDIR/jars"
# >清空后添加,>>追加
echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
echo "Build flags: $@" >> "$DISTDIR/RELEASE"

# Copy jars
cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/"

# Only create the yarn directory if the yarn artifacts were built.
if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then
mkdir "$DISTDIR/yarn"
cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/yarn"
fi

# Only create and copy the dockerfiles directory if the kubernetes artifacts were built.
if [ -d "$SPARK_HOME"/resource-managers/kubernetes/core/target/ ]; then
mkdir -p "$DISTDIR/kubernetes/"
# cp -a在复制时保留链接和文件属性等,相当于dpR
cp -a "$SPARK_HOME"/resource-managers/kubernetes/docker/src/main/dockerfiles "$DISTDIR/kubernetes/"
cp -a "$SPARK_HOME"/resource-managers/kubernetes/integration-tests/tests "$DISTDIR/kubernetes/"
fi

# Copy examples and dependencies
mkdir -p "$DISTDIR/examples/jars"
cp "$SPARK_HOME"/examples/target/scala*/jars/* "$DISTDIR/examples/jars"

# Deduplicate jars that have already been packaged as part of the main Spark dependencies.
for f in "$DISTDIR"/examples/jars/*; do
# basename去除文件路径,只保留文件名.后缀,相当于${var##*/}
# dirname去除文件名.后缀,只保留路径,相当于${var%/*}
name=$(basename "$f")
if [ -f "$DISTDIR/jars/$name" ]; then
rm "$DISTDIR/examples/jars/$name"
fi
done

# Copy example sources (needed for python and SQL)
mkdir -p "$DISTDIR/examples/src/main"
# cp -r若为目录,递归复制子文件和文件夹
cp -r "$SPARK_HOME/examples/src/main" "$DISTDIR/examples/src/"

# Copy license and ASF files
# -e 文件存在,-f 常规文件存在
if [ -e "$SPARK_HOME/LICENSE-binary" ]; then
cp "$SPARK_HOME/LICENSE-binary" "$DISTDIR/LICENSE"
cp -r "$SPARK_HOME/licenses-binary" "$DISTDIR/licenses"
cp "$SPARK_HOME/NOTICE-binary" "$DISTDIR/NOTICE"
else
echo "Skipping copying LICENSE files"
fi

if [ -e "$SPARK_HOME/CHANGES.txt" ]; then
cp "$SPARK_HOME/CHANGES.txt" "$DISTDIR"
fi

# Copy data files
cp -r "$SPARK_HOME/data" "$DISTDIR"

# Make pip package
if [ "$MAKE_PIP" == "true" ]; then
echo "Building python distribution package"
# pushd 切换到目录并以栈的形式保存,默认还会执行一个dirs命令输出当前目录元素
# 目录栈的栈顶始终是当前目录,反之亦然
pushd "$SPARK_HOME/python" > /dev/null
# Delete the egg info file if it exists, this can cache older setup files.
rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
python3 setup.py sdist
# popd弹出栈顶元素,并切换到新的栈顶,模型执行一个dirs命令
popd > /dev/null
else
echo "Skipping building python distribution package"
fi

# Make R package - this is used for both CRAN release and packing R layout into distribution
if [ "$MAKE_R" == "true" ]; then
echo "Building R source package"
# grep -V 显示版本信息
# awk 文本分析工具,如awk '{print $NF}'打印最后一个参数
# $NF 最后一个参数
R_PACKAGE_VERSION=`grep Version "$SPARK_HOME/R/pkg/DESCRIPTION" | awk '{print $NF}'`
pushd "$SPARK_HOME/R" > /dev/null
# Build source package and run full checks
# Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME
NO_TESTS=1 "$SPARK_HOME/R/check-cran.sh"

# Move R source package to match the Spark release version if the versions are not the same.
# NOTE(shivaram): `mv` throws an error on Linux if source and destination are same file
if [ "$R_PACKAGE_VERSION" != "$VERSION" ]; then
mv "$SPARK_HOME/R/SparkR_$R_PACKAGE_VERSION.tar.gz" "$SPARK_HOME/R/SparkR_$VERSION.tar.gz"
fi

# Install source package to get it to generate vignettes rds files, etc.
VERSION=$VERSION "$SPARK_HOME/R/install-source-package.sh"
popd > /dev/null
else
echo "Skipping building R source package"
fi

# Copy other things
mkdir "$DISTDIR/conf"
cp "$SPARK_HOME"/conf/*.template "$DISTDIR/conf"
cp "$SPARK_HOME/README.md" "$DISTDIR"
cp -r "$SPARK_HOME/bin" "$DISTDIR"
cp -r "$SPARK_HOME/python" "$DISTDIR"

# Remove the python distribution from dist/ if we built it
if [ "$MAKE_PIP" == "true" ]; then
rm -f "$DISTDIR"/python/dist/pyspark-*.tar.gz
fi

cp -r "$SPARK_HOME/sbin" "$DISTDIR"
# Copy SparkR if it exists
if [ -d "$SPARK_HOME/R/lib/SparkR" ]; then
mkdir -p "$DISTDIR/R/lib"
cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR/R/lib"
cp "$SPARK_HOME/R/lib/sparkr.zip" "$DISTDIR/R/lib"
fi

if [ "$MAKE_TGZ" == "true" ]; then
TARDIR_NAME=spark-$VERSION-bin-$NAME
TARDIR="$SPARK_HOME/$TARDIR_NAME"
rm -rf "$TARDIR"
cp -r "$DISTDIR" "$TARDIR"
tar czf "spark-$VERSION-bin-$NAME.tgz" -C "$SPARK_HOME" "$TARDIR_NAME"
rm -rf "$TARDIR"
fi

mvn

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# Determine the current working directory
_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Preserve the calling directory
_CALLING_DIR="$(pwd)"
# Options used during compilation
_COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g"

# Installs any application tarball given a URL, the expected tarball name,
# and, optionally, a checkable binary path to determine if the binary has
# already been installed
## Arg1 - URL
## Arg2 - Tarball Name
## Arg3 - Checkable Binary
install_app() {
## 声明局部变量
local remote_tarball="$1/$2"
local local_tarball="${_DIR}/$2"
local binary="${_DIR}/$3"

## curl -L 支持网页重定向
local curl_opts="--silent --show-error -L"
## wget --no-verbose关闭详尽输出,但不静默
local wget_opts="--no-verbose"
## -z 字符串是否长度为0
## -o 或
## ! 非
if [ -z "$3" -o ! -f "$binary" ]; then
# check if we already have the tarball
# check if we have curl installed
# download application
[ ! -f "${local_tarball}" ] && [ $(command -v curl) ] && \
echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2 && \
curl ${curl_opts} "${remote_tarball}" > "${local_tarball}"
# if the file still doesn't exist, lets try `wget` and cross our fingers
[ ! -f "${local_tarball}" ] && [ $(command -v wget) ] && \
echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2 && \
wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}"
# if both were unsuccessful, exit
[ ! -f "${local_tarball}" ] && \
echo -n "ERROR: Cannot download $2 with cURL or wget; " && \
echo "please install manually and try again." && \
exit 2
cd "${_DIR}" && tar -xzf "$2"
rm -rf "$local_tarball"
fi
}

# See simple version normalization: http://stackoverflow.com/questions/16989598/bash-comparing-version-numbers
function version { echo "$@" | awk -F. '{ printf("%03d%03d%03d\n", $1,$2,$3); }'; }

# Determine the Maven version from the root pom.xml file and
# install maven under the build/ folder if needed.
install_mvn() {
local MVN_VERSION=`grep "<maven.version>" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
MVN_BIN="$(command -v mvn)"
if [ "$MVN_BIN" ]; then
local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')"
fi
if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then
local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua?action=download&filename='}

if [ $(command -v curl) ]; then
local TEST_MIRROR_URL="${APACHE_MIRROR}/maven/maven-3/${MVN_VERSION}/binaries/apache-maven-${MVN_VERSION}-bin.tar.gz"
if ! curl -L --output /dev/null --silent --head --fail "$TEST_MIRROR_URL" ; then
# Fall back to archive.apache.org for older Maven
echo "Falling back to archive.apache.org to download Maven"
APACHE_MIRROR="https://archive.apache.org/dist"
fi
fi

install_app \
"${APACHE_MIRROR}/maven/maven-3/${MVN_VERSION}/binaries" \
"apache-maven-${MVN_VERSION}-bin.tar.gz" \
"apache-maven-${MVN_VERSION}/bin/mvn"

MVN_BIN="${_DIR}/apache-maven-${MVN_VERSION}/bin/mvn"
fi
}

# Install zinc under the build/ folder
install_zinc() {
local ZINC_VERSION=0.3.15
ZINC_BIN="$(command -v zinc)"
if [ "$ZINC_BIN" ]; then
local ZINC_DETECTED_VERSION="$(zinc -version | head -n1 | awk '{print $5}')"
fi

if [ $(version $ZINC_DETECTED_VERSION) -lt $(version $ZINC_VERSION) ]; then
local zinc_path="zinc-${ZINC_VERSION}/bin/zinc"
[ ! -f "${_DIR}/${zinc_path}" ] && ZINC_INSTALL_FLAG=1
local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.lightbend.com}

install_app \
"${TYPESAFE_MIRROR}/zinc/${ZINC_VERSION}" \
"zinc-${ZINC_VERSION}.tgz" \
"${zinc_path}"
ZINC_BIN="${_DIR}/${zinc_path}"
fi
}

# Determine the Scala version from the root pom.xml file, set the Scala URL,
# and, with that, download the specific version of Scala necessary under
# the build/ folder
install_scala() {
# determine the Scala version used in Spark
local scala_binary_version=`grep "scala.binary.version" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | grep ${scala_binary_version} | head -n1 | awk -F '[<>]' '{print $3}'`
local scala_bin="${_DIR}/scala-${scala_version}/bin/scala"
local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.lightbend.com}

install_app \
"${TYPESAFE_MIRROR}/scala/${scala_version}" \
"scala-${scala_version}.tgz" \
"scala-${scala_version}/bin/scala"

SCALA_COMPILER="$(cd "$(dirname "${scala_bin}")/../lib" && pwd)/scala-compiler.jar"
SCALA_LIBRARY="$(cd "$(dirname "${scala_bin}")/../lib" && pwd)/scala-library.jar"
}

# Setup healthy defaults for the Zinc port if none were provided from
# the environment
ZINC_PORT=${ZINC_PORT:-"3030"}

# Install the proper version of Scala, Zinc and Maven for the build
install_zinc
install_scala
install_mvn

# Reset the current working directory
cd "${_CALLING_DIR}"

# Now that zinc is ensured to be installed, check its status and, if its
# not running or just installed, start it
if [ -n "${ZINC_INSTALL_FLAG}" -o -z "`"${ZINC_BIN}" -status -port ${ZINC_PORT}`" ]; then
export ZINC_OPTS=${ZINC_OPTS:-"$_COMPILE_JVM_OPTS"}
"${ZINC_BIN}" -shutdown -port ${ZINC_PORT}
"${ZINC_BIN}" -start -port ${ZINC_PORT} \
-server 127.0.0.1 -idle-timeout 3h \
-scala-compiler "${SCALA_COMPILER}" \
-scala-library "${SCALA_LIBRARY}" &>/dev/null
fi

# Set any `mvn` options if not already present
export MAVEN_OPTS=${MAVEN_OPTS:-"$_COMPILE_JVM_OPTS"}

echo "Using \`mvn\` from path: $MVN_BIN" 1>&2

# call the `mvn` command as usual
# SPARK-25854
"${MVN_BIN}" -DzincPort=${ZINC_PORT} "$@"
MVN_RETCODE=$?

# Try to shut down zinc explicitly if the server is still running.
"${ZINC_BIN}" -shutdown -port ${ZINC_PORT}

exit $MVN_RETCODE

shell变量分为global和local两种。

global默认作用域为整个脚本,除非显式删除。

local作用域为函数内部,且在同名时优先。

curl和wget均可用于访问网页内容,但curl可以自定义请求参数等,擅长模拟浏览器活动;而wget支持ftp和Recursive,擅长文件下载。

stop-yarn.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env bash

function hadoop_usage
{
hadoop_generate_usage "${MYNAME}" false
}

# BASH_SOURCE取当前执行脚本路径
MYNAME="${BASH_SOURCE-$0}"

# cd -P到软链接的原始目录,cd -L默认方式到文件所在目录,不处理软链接
# pwd -P同上
bin=$(cd -P -- "$(dirname -- "${MYNAME}")" >/dev/null && pwd -P)

# let's locate libexec...
if [[ -n "${HADOOP_HOME}" ]]; then
HADOOP_DEFAULT_LIBEXEC_DIR="${HADOOP_HOME}/libexec"
else
HADOOP_DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi

# :-使用后者作为前者的缺省值
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi

# stop nodemanager
echo "Stopping nodemanagers"
hadoop_uservar_su yarn nodemanager "${HADOOP_YARN_HOME}/bin/yarn" \
--config "${HADOOP_CONF_DIR}" \
--workers \
--daemon stop \
nodemanager

# stop resourceManager
HARM=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey yarn.resourcemanager.ha.enabled 2>&-)
if [[ ${HARM} = "false" ]]; then
echo "Stopping resourcemanager"
hadoop_uservar_su yarn resourcemanager "${HADOOP_YARN_HOME}/bin/yarn" \
--config "${HADOOP_CONF_DIR}" \
--daemon stop \
resourcemanager
else
logicals=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey yarn.resourcemanager.ha.rm-ids 2>&-)
logicals=${logicals//,/ }
for id in ${logicals}
do
rmhost=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey "yarn.resourcemanager.hostname.${id}" 2>&-)
RMHOSTS="${RMHOSTS} ${rmhost}"
done
echo "Stopping resourcemanagers on [${RMHOSTS}]"
hadoop_uservar_su yarn resourcemanager "${HADOOP_YARN_HOME}/bin/yarn" \
--config "${HADOOP_CONF_DIR}" \
--daemon stop \
--workers \
--hostnames "${RMHOSTS}" \
resourcemanager
fi

# stop proxyserver
# cut用于显式每行特定区间的字符串,-f与-d合用
PROXYSERVER=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey yarn.web-proxy.address 2>&- | cut -f1 -d:)
if [[ -n ${PROXYSERVER} ]]; then
echo "Stopping proxy server [${PROXYSERVER}]"
hadoop_uservar_su yarn proxyserver "${HADOOP_YARN_HOME}/bin/yarn" \
--config "${HADOOP_CONF_DIR}" \
--workers \
--hostnames "${PROXYSERVER}" \
--daemon stop \
proxyserver
fi

start-master.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# Starts the master on the machine this script is executed on.

if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

# NOTE: This exact class name is matched downstream by SparkSubmit.
# Any changes need to be reflected there.
CLASS="org.apache.spark.deploy.master.Master"

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
echo "Usage: ./sbin/start-master.sh [options]"
pattern="Usage:"
pattern+="\|Using Spark's default log4j profile:"
pattern+="\|Registered signal handlers for"

"${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
exit 1
fi

ORIGINAL_ARGS="$@"

. "${SPARK_HOME}/sbin/spark-config.sh"

. "${SPARK_HOME}/bin/load-spark-env.sh"

if [ "$SPARK_MASTER_PORT" = "" ]; then
SPARK_MASTER_PORT=7077
fi

if [ "$SPARK_MASTER_HOST" = "" ]; then
case `uname` in
(SunOS)
SPARK_MASTER_HOST="`/usr/sbin/check-hostname | awk '{print $NF}'`"
;;
(*)
# hostname -f显示主机全域名
SPARK_MASTER_HOST="`hostname -f`"
;;
esac
fi

if [ "$SPARK_MASTER_WEBUI_PORT" = "" ]; then
SPARK_MASTER_WEBUI_PORT=8080
fi

"${SPARK_HOME}/sbin"/spark-daemon.sh start $CLASS 1 \
--host $SPARK_MASTER_HOST --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT \
$ORIGINAL_ARGS

spark-daemon.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Runs a Spark command as a daemon.
#
# Environment Variables
#
# SPARK_CONF_DIR Alternate conf dir. Default is ${SPARK_HOME}/conf.
# SPARK_LOG_DIR Where log files are stored. ${SPARK_HOME}/logs by default.
# SPARK_MASTER host:path where spark code should be rsync'd from
# SPARK_PID_DIR The pid files are stored. /tmp by default.
# SPARK_IDENT_STRING A string representing this instance of spark. $USER by default
# SPARK_NICENESS The scheduling priority for daemons. Defaults to 0.
# SPARK_NO_DAEMONIZE If set, will run the proposed command in the foreground. It will not output a PID file.
##

usage="Usage: spark-daemon.sh [--config <conf-dir>] (start|stop|submit|status) <spark-command> <spark-instance-number> <args...>"

# if no args specified, show usage
if [ $# -le 1 ]; then
echo $usage
exit 1
fi

if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

. "${SPARK_HOME}/sbin/spark-config.sh"

# get arguments

# Check if --config is passed as an argument. It is an optional parameter.
# Exit if the argument is not a directory.

if [ "$1" == "--config" ]
then
shift
conf_dir="$1"
if [ ! -d "$conf_dir" ]
then
echo "ERROR : $conf_dir is not a directory"
echo $usage
exit 1
else
export SPARK_CONF_DIR="$conf_dir"
fi
shift
fi

option=$1
shift
command=$1
shift
instance=$1
shift

spark_rotate_log ()
{
log=$1;
num=5;
if [ -n "$2" ]; then
num=$2
fi
if [ -f "$log" ]; then # rotate logs
while [ $num -gt 1 ]; do
prev=`expr $num - 1`
[ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
num=$prev
done
mv "$log" "$log.$num";
fi
}

. "${SPARK_HOME}/bin/load-spark-env.sh"

if [ "$SPARK_IDENT_STRING" = "" ]; then
export SPARK_IDENT_STRING="$USER"
fi


export SPARK_PRINT_LAUNCH_COMMAND="1"

# get log directory
if [ "$SPARK_LOG_DIR" = "" ]; then
export SPARK_LOG_DIR="${SPARK_HOME}/logs"
fi
mkdir -p "$SPARK_LOG_DIR"
touch "$SPARK_LOG_DIR"/.spark_test > /dev/null 2>&1
TEST_LOG_DIR=$?
if [ "${TEST_LOG_DIR}" = "0" ]; then
rm -f "$SPARK_LOG_DIR"/.spark_test
else
chown "$SPARK_IDENT_STRING" "$SPARK_LOG_DIR"
fi

if [ "$SPARK_PID_DIR" = "" ]; then
SPARK_PID_DIR=/tmp
fi

# some variables
log="$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$instance-$HOSTNAME.out"
pid="$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid"

# Set default scheduling priority
if [ "$SPARK_NICENESS" = "" ]; then
export SPARK_NICENESS=0
fi

execute_command() {
if [ -z ${SPARK_NO_DAEMONIZE+set} ]; then
nohup -- "$@" >> $log 2>&1 < /dev/null &
newpid="$!"

echo "$newpid" > "$pid"

# Poll for up to 5 seconds for the java process to start
# {1..10} 1到10
for i in {1..10}
do
if [[ $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
break
fi
sleep 0.5
done

sleep 2
# Check if the process has died; in that case we'll tail the log so the user can see
if [[ ! $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
echo "failed to launch: $@"
tail -10 "$log" | sed 's/^/ /'
echo "full log in $log"
fi
else
"$@"
fi
}

run_command() {
mode="$1"
shift

mkdir -p "$SPARK_PID_DIR"

if [ -f "$pid" ]; then
TARGET_ID="$(cat "$pid")"
if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
echo "$command running as process $TARGET_ID. Stop it first."
exit 1
fi
fi

if [ "$SPARK_MASTER" != "" ]; then
echo rsync from "$SPARK_MASTER"
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' "$SPARK_MASTER/" "${SPARK_HOME}"
fi

spark_rotate_log "$log"
echo "starting $command, logging to $log"

case "$mode" in
(class)
execute_command nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-class "$command" "$@"
;;

(submit)
execute_command nice -n "$SPARK_NICENESS" bash "${SPARK_HOME}"/bin/spark-submit --class "$command" "$@"
;;

(*)
echo "unknown mode: $mode"
exit 1
;;
esac

}

case $option in

(submit)
run_command submit "$@"
;;

(start)
run_command class "$@"
;;

(stop)

if [ -f $pid ]; then
TARGET_ID="$(cat "$pid")"
# ps -p 获取pid,-o comm=获取执行的命令列
# =~匹配后面的正则表达式
if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
echo "stopping $command"
kill "$TARGET_ID" && rm -f "$pid"
else
echo "no $command to stop"
fi
else
echo "no $command to stop"
fi
;;

(status)

if [ -f $pid ]; then
TARGET_ID="$(cat "$pid")"
if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
echo $command is running.
exit 0
else
echo $pid file is present but $command not running
exit 1
fi
else
echo $command not running.
exit 2
fi
;;

(*)
echo $usage
exit 1
;;

esac

参考资料

Shell 教程

Linux 命令大全

Shell编程中Shift的用法

shell 用command 命令

git-rev-parse

shell local命令

curl 命令详解~~

curl和wget的区别和使用

wget命令详解

Shell: BASH_SOURCE

cd

(21). ps -o comm= -p $$ 查看你使用的shell