From bf1d2e22f5a84e1b35872011b1cafd95ce69cd3b Mon Sep 17 00:00:00 2001 From: Nicholas Pease Date: Wed, 17 Dec 2025 20:13:39 -0500 Subject: [PATCH] Init --- .devcontainer/Dockerfile | 14 ++++++++++++++ .devcontainer/devcontainer.json | 32 ++++++++++++++++++++++++++++++++ .devcontainer/docker-compose.yml | 20 ++++++++++++++++++++ pt2-in.txt | 3 +++ requirements.txt | 2 ++ task2.py | 16 ++++++++++++++++ 6 files changed, 87 insertions(+) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .devcontainer/docker-compose.yml create mode 100644 pt2-in.txt create mode 100644 requirements.txt create mode 100644 task2.py diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000..ded4ad1 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,14 @@ +# [Choice] Python version: 3, 3.8, 3.7, 3.6 +ARG PYTHON_VARIANT=3.9 +FROM python:${PYTHON_VARIANT} + +# Install python dependencies +COPY requirements.txt /tmp/pip-tmp/ +RUN pip3 --disable-pip-version-check --no-cache-dir \ + install -r /tmp/pip-tmp/requirements.txt && rm -rf /tmp/pip-tmp + +# Install OpenJDK +ARG OPENJDK_VARIANT=11 +RUN apt-get update && \ + apt-get install -y openjdk-21-jdk && \ + apt-get clean \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..33df37f --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,32 @@ +{ + "name": "Python 3 & Apache Spark", + "dockerComposeFile": "docker-compose.yml", + "service": "app", + "workspaceFolder": "/workspace", + "settings": { + "terminal.integrated.profiles.linux": { + "bash": { + "path": "/bin/bash", + "icon": "terminal-bash" + } + }, + "python.pythonPath": "/usr/local/bin/python", + "python.languageServer": "Pylance", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8", + "python.formatting.blackPath": "/usr/local/py-utils/bin/black", + "python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf", + "python.linting.banditPath": "/usr/local/py-utils/bin/bandit", + "python.linting.flake8Path": "/usr/local/py-utils/bin/flake8", + "python.linting.mypyPath": "/usr/local/py-utils/bin/mypy", + "python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle", + "python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle", + "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint", + "python.testing.pytestPath": "/usr/local/py-utils/bin/pytest" + }, + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance" + ] +} \ No newline at end of file diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 0000000..18e5815 --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,20 @@ +version: '3' + +services: + app: + build: + context: .. + dockerfile: .devcontainer/Dockerfile + args: + PYTHON_VARIANT: 3.9 + JAVA_VARIANT: 11-bullseye + volumes: + - ..:/workspace:cached + command: sleep infinity + + pyspark: + image: jupyter/pyspark-notebook:spark-3.1.2 + environment: + - JUPYTER_ENABLE_LAB=yes + ports: + - 8889:8888 \ No newline at end of file diff --git a/pt2-in.txt b/pt2-in.txt new file mode 100644 index 0000000..589cc05 --- /dev/null +++ b/pt2-in.txt @@ -0,0 +1,3 @@ +0 1 +0 2 +2 3 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fdee051 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +ipykernel==6.4.1 +pyspark==3.1.2 \ No newline at end of file diff --git a/task2.py b/task2.py new file mode 100644 index 0000000..f12f945 --- /dev/null +++ b/task2.py @@ -0,0 +1,16 @@ +# import findspark +# findspark.init() + +from pyspark import SparkConf, SparkContext +def main(): + conf = SparkConf().setAppName("local123").setMaster("local[*]") + sc = SparkContext(conf=conf) + + data = sc.textFile("pt2-in.txt") + + # Read and display the first few lines of the data + first_lines = data.take(10) + for line in first_lines: + print(line) + +main() \ No newline at end of file