Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨(edx) add mongodb connection to remove personal data from comments #41

Draft
wants to merge 2 commits into
base: rename-tasks
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions .env.dist
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ MORK_WARNING_PERIOD=P3Y30D
MORK_DELETION_PERIOD=P3Y
MORK_DELETE_MAX_RETRIES=3

# Edx forum configuration
MORK_EDX_FORUM_PLACEHOLDER_USER_ID=1234

# Mork database
MORK_DB_ENGINE=postgresql+psycopg2
MORK_DB_HOST=postgresql
Expand All @@ -21,15 +24,24 @@ MORK_DB_PORT=5432
MORK_DB_DEBUG=False
MORK_TEST_DB_NAME=test-mork-db

# Edx database
MORK_EDX_DB_ENGINE=mysql+pymysql
MORK_EDX_DB_HOST=mysql
MORK_EDX_DB_NAME=edxapp
MORK_EDX_DB_USER=edxapp
MORK_EDX_DB_PASSWORD=password
MORK_EDX_DB_PORT=3306
MORK_EDX_DB_DEBUG=False
MORK_EDX_QUERY_BATCH_SIZE=1000
# Edx MySQL database
MORK_EDX_MYSQL_DB_ENGINE=mysql+pymysql
MORK_EDX_MYSQL_DB_HOST=mysql
MORK_EDX_MYSQL_DB_NAME=edxapp
MORK_EDX_MYSQL_DB_USER=edxapp
MORK_EDX_MYSQL_DB_PASSWORD=password
MORK_EDX_MYSQL_DB_PORT=3306
MORK_EDX_MYSQL_DB_DEBUG=False
MORK_EDX_MYSQL_QUERY_BATCH_SIZE=1000

# Edx MongoDB database
MORK_EDX_MONGO_DB_ENGINE=mongodb
MORK_EDX_MONGO_DB_HOST=mongo
MORK_EDX_MONGO_DB_NAME=cs_comments_service
MORK_EDX_MONGO_DB_USER=
MORK_EDX_MONGO_DB_PASSWORD=
MORK_EDX_MONGO_DB_PORT=27017
MORK_EDX_MONGO_DB_DEBUG=False

# Redis configuration
REDIS_HOST=localhost
Expand Down
18 changes: 12 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@ COMPOSE_RUN_API = $(COMPOSE_RUN) api
COMPOSE_RUN_MAIL = $(COMPOSE_RUN) mail-generator

# -- MySQL
EDX_DB_HOST = mysql
EDX_DB_PORT = 3306
EDX_MYSQL_DB_HOST = mysql
EDX_MYSQL_DB_PORT = 3306

# -- MongoDB
EDX_MONGO_DB_HOST = mongo
EDX_MONGO_DB_PORT = 27017

# -- Postgresql
DB_HOST = postgresql
Expand Down Expand Up @@ -52,7 +56,7 @@ bootstrap: \
build \
run \
migrate \
seed-edx-database \
seed-edx-databases \
mails-install \
mails-build
.PHONY: bootstrap
Expand Down Expand Up @@ -116,12 +120,14 @@ stop: ## stop all servers
@$(COMPOSE) stop
.PHONY: stop

seed-edx-database: ## seed the edx database with test data
seed-edx-databases: ## seed the edx MySQL and MongoDB databases with test data
@echo "Waiting for mysql to be up and running…"
@$(COMPOSE_RUN) dockerize -wait tcp://$(EDX_DB_HOST):$(EDX_DB_PORT) -timeout 60s
@$(COMPOSE_RUN) dockerize -wait tcp://$(EDX_MYSQL_DB_HOST):$(EDX_MYSQL_DB_PORT) -timeout 60s
@echo "Waiting for mongodb to be up and running…"
@$(COMPOSE_RUN) dockerize -wait tcp://$(EDX_MONGO_DB_HOST):$(EDX_MONGO_DB_PORT) -timeout 60s
@echo "Seeding the edx database…"
@$(COMPOSE) exec -T celery python /opt/src/seed_edx_database.py
.PHONY: seed-edx-database
.PHONY: seed-edx-databases

# -- Provisioning
create-test-db: ## create test database
Expand Down
28 changes: 23 additions & 5 deletions bin/seed_edx_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import Session

from mongoengine import connect, disconnect

from mork.edx.mongo.factories import CommentFactory, CommentThreadFactory
from mork.conf import settings
from mork.edx.factories.auth import EdxAuthUserFactory
from mork.edx.models.base import Base
from mork.edx.mysql.factories.auth import EdxAuthUserFactory
from mork.edx.mysql.models.base import Base


async def seed_edx_database():
async def seed_edx_mysql_database():
"""Seed the MySQL edx database with mocked data."""
engine = create_engine(settings.EDX_DB_URL)
engine = create_engine(settings.EDX_MYSQL_DB_URL)
session = Session(engine)
EdxAuthUserFactory._meta.sqlalchemy_session = session # noqa: SLF001
EdxAuthUserFactory._meta.sqlalchemy_session_persistence = "commit" # noqa: SLF001
Expand All @@ -21,5 +24,20 @@ async def seed_edx_database():
EdxAuthUserFactory.create_batch(1000)


async def seed_edx_mongodb_database():
"""Seed the MongoDB edx database with mocked data."""
connect(host=settings.EDX_MONGO_DB_HOST, db=settings.EDX_MONGO_DB_NAME)

CommentFactory.create_batch(1000)
CommentThreadFactory.create_batch(1000)

disconnect(alias="mongodb")


async def main():
tasks = [seed_edx_mysql_database(), seed_edx_mongodb_database()]
await asyncio.gather(*tasks)


if __name__ == "__main__":
asyncio.run(seed_edx_database())
asyncio.run(main())
8 changes: 8 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ services:
- api
- redis
- mailcatcher
- mongo
- mysql
- postgresql

Expand Down Expand Up @@ -76,3 +77,10 @@ services:
env_file:
- .env
command: mysqld --character-set-server=utf8 --collation-server=utf8_general_ci

mongo:
image: mongo:3.0.15
ports:
- "27017:27017"
env_file:
- .env
1 change: 1 addition & 0 deletions renovate.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"commitMessageAction": "upgrade",
"commitBodyTable": true,
"ignoreDeps": [
"pymongo"
],
"dependencyDashboard": true
}
12 changes: 6 additions & 6 deletions src/app/mork/celery/tasks/deletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from mork.celery.celery_app import app
from mork.conf import settings
from mork.database import MorkDB
from mork.edx import crud
from mork.edx.database import OpenEdxDB
from mork.edx.mysql import crud
from mork.edx.mysql.database import OpenEdxMySQLDB
from mork.exceptions import UserDeleteError
from mork.models import EmailStatus

Expand All @@ -20,16 +20,16 @@
@app.task
def delete_inactive_users():
"""Celery task to delete inactive users accounts."""
db = OpenEdxDB()
db = OpenEdxMySQLDB()
threshold_date = datetime.now() - settings.DELETION_PERIOD

total = crud.get_inactive_users_count(db.session, threshold_date)
for batch_offset in range(0, total, settings.EDX_QUERY_BATCH_SIZE):
for batch_offset in range(0, total, settings.EDX_MYSQL_QUERY_BATCH_SIZE):
inactive_users = crud.get_inactive_users(
db.session,
threshold_date,
offset=batch_offset,
limit=settings.EDX_QUERY_BATCH_SIZE,
limit=settings.EDX_MYSQL_QUERY_BATCH_SIZE,
)
delete_group = group([delete_user.s(user.email) for user in inactive_users])
delete_group.delay()
Expand All @@ -53,7 +53,7 @@ def delete_user(self, email: str):

def delete_user_from_db(email):
"""Delete user from edX database."""
db = OpenEdxDB()
db = OpenEdxMySQLDB()

# Delete user from edX database
crud.delete_user(db.session, email=email)
Expand Down
10 changes: 5 additions & 5 deletions src/app/mork/celery/tasks/emailing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from mork.celery.celery_app import app
from mork.conf import settings
from mork.database import MorkDB
from mork.edx import crud
from mork.edx.database import OpenEdxDB
from mork.edx.mysql import crud
from mork.edx.mysql.database import OpenEdxMySQLDB
from mork.exceptions import EmailAlreadySent, EmailSendError
from mork.mail import send_email
from mork.models import EmailStatus
Expand All @@ -21,17 +21,17 @@
@app.task
def warn_inactive_users():
"""Celery task to warn inactive users by email."""
db = OpenEdxDB()
db = OpenEdxMySQLDB()

threshold_date = datetime.now() - settings.WARNING_PERIOD

total = crud.get_inactive_users_count(db.session, threshold_date)
for batch_offset in range(0, total, settings.EDX_QUERY_BATCH_SIZE):
for batch_offset in range(0, total, settings.EDX_MYSQL_QUERY_BATCH_SIZE):
inactive_users = crud.get_inactive_users(
db.session,
threshold_date,
offset=batch_offset,
limit=settings.EDX_QUERY_BATCH_SIZE,
limit=settings.EDX_MYSQL_QUERY_BATCH_SIZE,
)
send_email_group = group(
[warn_user.s(user.email, user.username) for user in inactive_users]
Expand Down
47 changes: 34 additions & 13 deletions src/app/mork/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ class Settings(BaseSettings):
DELETION_PERIOD: timedelta = "P3Y"
DELETE_MAX_RETRIES: int = 3

# Edx forum configuration
EDX_FORUM_PLACEHOLDER_USER_ID: int = 1234

# API Root path
# (used at least by everything that is alembic-configuration-related)
ROOT_PATH: Path = Path(__file__).parent
Expand All @@ -54,15 +57,24 @@ class Settings(BaseSettings):
DB_DEBUG: bool = False
TEST_DB_NAME: str = "test-mork-db"

# EDX database
EDX_DB_ENGINE: str = "mysql+pymysql"
EDX_DB_HOST: str = "mysql"
EDX_DB_NAME: str = "edxapp"
EDX_DB_USER: str = "edxapp"
EDX_DB_PASSWORD: str = "password"
EDX_DB_PORT: int = 3306
EDX_DB_DEBUG: bool = False
EDX_QUERY_BATCH_SIZE: int = 1000
# EDX MySQL database
EDX_MYSQL_DB_ENGINE: str = "mysql+pymysql"
EDX_MYSQL_DB_HOST: str = "mysql"
EDX_MYSQL_DB_NAME: str = "edxapp"
EDX_MYSQL_DB_USER: str = "edxapp"
EDX_MYSQL_DB_PASSWORD: str = "password"
EDX_MYSQL_DB_PORT: int = 3306
EDX_MYSQL_DB_DEBUG: bool = False
EDX_MYSQL_QUERY_BATCH_SIZE: int = 1000

# EDX MongoDB database
EDX_MONGO_DB_ENGINE: str = "mongodb"
EDX_MONGO_DB_HOST: str = "mongo"
EDX_MONGO_DB_NAME: str = "cs_comments_service"
EDX_MONGO_DB_USER: str = "cs_comments_service"
EDX_MONGO_DB_PASSWORD: str = "password"
EDX_MONGO_DB_PORT: int = 27017
EDX_MONGO_DB_DEBUG: bool = False

# Redis configuration
REDIS_HOST: str = "localhost"
Expand Down Expand Up @@ -116,12 +128,21 @@ def TEST_DB_URL(self) -> str:
)

@property
def EDX_DB_URL(self) -> str:
def EDX_MYSQL_DB_URL(self) -> str:
"""Get the edx MySQL database URL as required by SQLAlchemy."""
return (
f"{self.EDX_MYSQL_DB_ENGINE}://"
f"{self.EDX_MYSQL_DB_USER}:{self.EDX_MYSQL_DB_PASSWORD}@"
f"{self.EDX_MYSQL_DB_HOST}/{self.EDX_MYSQL_DB_NAME}"
)

@property
def EDX_MONGO_DB_URL(self) -> str:
"""Get the edx database URL as required by SQLAlchemy."""
return (
f"{self.EDX_DB_ENGINE}://"
f"{self.EDX_DB_USER}:{self.EDX_DB_PASSWORD}@"
f"{self.EDX_DB_HOST}/{self.EDX_DB_NAME}"
f"{self.EDX_MONGO_DB_ENGINE}://"
f"{self.EDX_MONGO_DB_USER}:{self.EDX_MONGO_DB_PASSWORD}@"
f"{self.EDX_MONGO_DB_HOST}/{self.EDX_MONGO_DB_NAME}"
)

@property
Expand Down
1 change: 1 addition & 0 deletions src/app/mork/edx/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# noqa: D104
1 change: 0 additions & 1 deletion src/app/mork/edx/factories/__init__.py

This file was deleted.

1 change: 0 additions & 1 deletion src/app/mork/edx/models/__init__.py

This file was deleted.

1 change: 1 addition & 0 deletions src/app/mork/edx/mongo/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# noqa: D104
40 changes: 40 additions & 0 deletions src/app/mork/edx/mongo/crud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Module for MongoDB CRUD functions."""

from logging import getLogger

from mongoengine.queryset.visitor import Q

from mork.conf import settings
from mork.edx.mongo.models import Comment, CommentThread

logger = getLogger(__name__)


def anonymize_comments(username: str) -> None:
"""Anonymize user comments and threads.

Parameters:
username (str): The username of the user to delete comments from.
"""
comments = Comment.objects(Q(type="Comment") & Q(author_username=username)).all()

comments.update(
author_username="[deleted]",
body="[deleted]",
author_id=settings.EDX_FORUM_PLACEHOLDER_USER_ID,
anonymous=True,
)

comment_threads = CommentThread.objects(
Q(type="CommentThread") & Q(author_username=username)
).all()

comment_threads.update(
author_username="[deleted]",
title="[deleted]",
body="[deleted]",
author_id=settings.EDX_FORUM_PLACEHOLDER_USER_ID,
anonymous=True,
)

logger.info(f"Anonymised user {username} comments and threads")
18 changes: 18 additions & 0 deletions src/app/mork/edx/mongo/database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Mork edx MongoDB database connection."""

from pymongo import MongoClient

# from pymongo.collection import Collection
from mork.conf import settings


class OpenEdxMongoDB:
"""Class to connect to the Open edX MongoDB database."""

session = None

def __init__(self):
"""Instantiate the MongoDB client."""
self.client = MongoClient(settings.EDX_MONGO_DB_URL)
self.database = self.client[settings.EDX_MONGO_DB_NAME]
self.collection = self.database["contents"]
Loading