Wednesday, October 10, 2018

Use Jupyter Notebook with Spark2 on Apache Spark on MacOS

Reference url: http://www.dbaglobe.com/2018/07/use-jupyter-notebook-with-spark2-on.html

Below are changes specific for apache spark, anaconda3 on MacOS

mkdir /Users/donghua/anaconda3/share/jupyter/kernels/pyspark2/

[root@cdh-vm bin]# cat  /Users/donghua/anaconda3/share/jupyter/kernels/pyspark2/kernel.json
    {
      "argv": [
        "python3.6",
        "-m",
        "ipykernel_launcher",
        "-f",
        "{connection_file}"
      ],
      "display_name": "Python3.6 + Pyspark(Spark 2.3.2)",
      "language": "python",
      "env": {
        "PYSPARK_PYTHON": "python",
        "SPARK_HOME": "/Users/donghua/spark-2.3.2-bin-hadoop2.7",
        "SPARK_CONF_DIR": "/Users/donghua/spark-2.3.2-bin-hadoop2.7/conf",
        "PYTHONPATH": "/Users/donghua/spark-2.3.2-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip:/Users/donghua/spark-2.3.2-bin-hadoop2.7/python/:",
        "PYTHONSTARTUP": "/Users/donghua/spark-2.3.2-bin-hadoop2.7/python/pyspark/shell.py",
        "PYSPARK_SUBMIT_ARGS": "--master spark://Donghuas-MacBook-Air.local:7077 --name PySparkShell pyspark-shell"
      }
   }

/Users/donghua/anaconda3//bin/jupyter-notebook --ip=Donghuas-MacBook-Air.local --port 9999

#export PYSPARK_DRIVER_PYTHON=ipython
#export SPARK_HOME=/Users/donghua/spark-2.3.2-bin-hadoop2.7
#export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH