This commit is contained in:
2025-12-17 20:13:39 -05:00
parent 0819fe05b7
commit bf1d2e22f5
6 changed files with 87 additions and 0 deletions
+14
View File
@@ -0,0 +1,14 @@
# [Choice] Python version: 3, 3.8, 3.7, 3.6
ARG PYTHON_VARIANT=3.9
FROM python:${PYTHON_VARIANT}
# Install python dependencies
COPY requirements.txt /tmp/pip-tmp/
RUN pip3 --disable-pip-version-check --no-cache-dir \
install -r /tmp/pip-tmp/requirements.txt && rm -rf /tmp/pip-tmp
# Install OpenJDK
ARG OPENJDK_VARIANT=11
RUN apt-get update && \
apt-get install -y openjdk-21-jdk && \
apt-get clean
+32
View File
@@ -0,0 +1,32 @@
{
"name": "Python 3 & Apache Spark",
"dockerComposeFile": "docker-compose.yml",
"service": "app",
"workspaceFolder": "/workspace",
"settings": {
"terminal.integrated.profiles.linux": {
"bash": {
"path": "/bin/bash",
"icon": "terminal-bash"
}
},
"python.pythonPath": "/usr/local/bin/python",
"python.languageServer": "Pylance",
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint",
"python.testing.pytestPath": "/usr/local/py-utils/bin/pytest"
},
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
]
}
+20
View File
@@ -0,0 +1,20 @@
version: '3'
services:
app:
build:
context: ..
dockerfile: .devcontainer/Dockerfile
args:
PYTHON_VARIANT: 3.9
JAVA_VARIANT: 11-bullseye
volumes:
- ..:/workspace:cached
command: sleep infinity
pyspark:
image: jupyter/pyspark-notebook:spark-3.1.2
environment:
- JUPYTER_ENABLE_LAB=yes
ports:
- 8889:8888
+3
View File
@@ -0,0 +1,3 @@
0 1
0 2
2 3
+2
View File
@@ -0,0 +1,2 @@
ipykernel==6.4.1
pyspark==3.1.2
+16
View File
@@ -0,0 +1,16 @@
# import findspark
# findspark.init()
from pyspark import SparkConf, SparkContext
def main():
conf = SparkConf().setAppName("local123").setMaster("local[*]")
sc = SparkContext(conf=conf)
data = sc.textFile("pt2-in.txt")
# Read and display the first few lines of the data
first_lines = data.take(10)
for line in first_lines:
print(line)
main()