PySpark开发环境

1 系统环境

1
2
# ModuleNotFoundError: No module named '_ctypes'
yum -y install libffi-devel

2 Python环境

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# Python >= 3.6
# 部署
wget https://www.python.org/ftp/python/3.9.7/Python-3.9.7.tgz
tar -zxvf Python-3.9.7.tgz
cd Python-3.9.7
./configure
make
make install

# 备份并软链
mv /usr/bin/python /usr/bin/python.bak
ln -s /usr/local/bin/python3 /usr/bin/python

# 修改yum命令
vim /usr/bin/yum
vim /usr/libexec/urlgrabber-ext-down

3 Spark环境

1
2
3
4
5
6
7
# No SPARK_HOME
# 设置SPARK_HOME环境变量
export SPARK_HOME=/path/to/spark

# ModuleNotFoundError: No module named 'py4j'
# 设置PYTHONPATH
export PYTHONPATH=$(ZIPS=("$SPARK_HOME"/python/lib/*.zip); IFS=:; echo "${ZIPS[*]}"):$PYTHONPATH

参考资料