Skip to content
This repository has been archived by the owner on Aug 23, 2022. It is now read-only.

Commit

Permalink
Improve data store (#54)
Browse files Browse the repository at this point in the history
* Bump version to 2.11

* Start refactoring SQLAlchemy usage for 1.4/2.0

Start refactoring the code in the datastore to follow SQLAlchemy 2.0
syntax.
Something is up with equality checking, hence __repr__ in test

Co-authored-by: Mattias Springare <[email protected]>

* Add mypy config to gradually roll out types

Add mypy config to gradually roll out types in the code base. The
configured tox environment will not run unless explicitly specified
(with tox -e types).

The mypy config will ignore all files that have not been opted in,
allowing a gradual roll out through the codebase.

* Add types, update __repr__  and improve docs for data_store and models

Add types for data_store and model, as part of an effort to gradually
roll out types in the code base. Related to #56.

Change import structure for sqlalchemy and only import the top level
package, to make the code easier to read and understand where e.g.,
"Column" comes from.

Improve the docstrings by attempting to have a first line summary and
additional description in follow lines. Types were removed from the
docstring as they are part of the function signature.
The docstring is written in 120 character lines, given that the codebase
is using them, but it would be easier to read in terminal usage if they
were 80 char lines.

Update the __repr__ method of models to print only the relevant files
(and remove a bunch of unnecessary code).

* Improve tests, especially around data store

Re-define test utils as pytest fixtures for easier usage.

Generalise pytest fixtures used for the data store and chain the
fixtures so that the initialised data store-fixture depends on the data
store-fixture etc. This will make it easier to do test-related changes
in the objects.

The fixtures can be improved further, there seems to be no clear
consensus if the tests should create/load it's own data or if they
should use the pre-populated fixtures. The current state is working, but
the tests would be easier to read if they relied more on fixtures.

Add docstrings to the data store tests, explaining what the tests do.

Clean up some test code by removing duplicate tests, splitting some
tests that do many things and remove unnecessary assert statements
(e.g., first checking if the object is not None, then if it is a
datetime instance. Only the datetime instance check is needed as it will
then not be None).

Co-authored-by: Mattias Springare <[email protected]>
  • Loading branch information
vikahl and mspringare authored Aug 27, 2021
1 parent 6a16187 commit 6610cd4
Show file tree
Hide file tree
Showing 10 changed files with 554 additions and 486 deletions.
486 changes: 257 additions & 229 deletions comet_core/data_store.py

Large diffs are not rendered by default.

70 changes: 29 additions & 41 deletions comet_core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,34 +16,16 @@
import json
from datetime import datetime

from sqlalchemy import JSON, Column, DateTime, Integer, String, UnicodeText, types
from sqlalchemy.ext.declarative import declarative_base
import sqlalchemy
import sqlalchemy.orm

BaseRecord = sqlalchemy.orm.declarative_base()

class BaseRecordRepr:
"""
This class can be used by declarative_base, to add an automatic
__repr__ method to *all* subclasses of BaseRecord.
"""

def __repr__(self):
"""Return a representation of this object as a string.
Returns:
str: a representation of the object.
"""
return f"{self.__class__.__name__}: " + " ".join(
[f"{k}={self.__getattribute__(k)}" for k, v in self.__class__.__dict__.items() if hasattr(v, "__set__")]
)


BaseRecord = declarative_base(cls=BaseRecordRepr)


class JSONType(types.TypeDecorator): # pylint: disable=abstract-method
class JSONType(sqlalchemy.types.TypeDecorator): # pylint: disable=abstract-method
"""This is for testing purposes, to make the JSON type work with sqlite."""

impl = UnicodeText
impl = sqlalchemy.UnicodeText

cache_ok = True

Expand All @@ -57,7 +39,7 @@ def load_dialect_impl(self, dialect):
object: if dialect name is 'mysql' it will override the type descriptor to JSON()
"""
if dialect.name == "mysql":
return dialect.type_descriptor(JSON())
return dialect.type_descriptor(sqlalchemy.JSON())
return dialect.type_descriptor(self.impl)

def process_bind_param(self, value, dialect):
Expand Down Expand Up @@ -101,17 +83,17 @@ class EventRecord(BaseRecord):
"""

__tablename__ = "event"
id = Column(Integer, primary_key=True)
source_type = Column(String(250), nullable=False)
fingerprint = Column(String(250))
owner = Column(String(250))
event_metadata = Column(JSONType())
data = Column(JSONType())

received_at = Column(DateTime, default=datetime.utcnow)
sent_at = Column(DateTime, default=None)
escalated_at = Column(DateTime, default=None)
processed_at = Column(DateTime, default=None)
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
source_type = sqlalchemy.Column(sqlalchemy.String(250), nullable=False)
fingerprint = sqlalchemy.Column(sqlalchemy.String(250))
owner = sqlalchemy.Column(sqlalchemy.String(250))
event_metadata = sqlalchemy.Column(JSONType())
data = sqlalchemy.Column(JSONType())

received_at = sqlalchemy.Column(sqlalchemy.DateTime, default=datetime.utcnow)
sent_at = sqlalchemy.Column(sqlalchemy.DateTime, default=None)
escalated_at = sqlalchemy.Column(sqlalchemy.DateTime, default=None)
processed_at = sqlalchemy.Column(sqlalchemy.DateTime, default=None)

def __init__(self, *args, **kwargs):
self.new = False
Expand All @@ -129,21 +111,27 @@ def update_metadata(self, metadata):
else:
self.event_metadata = metadata

def __repr__(self):
return f"EventRecord(id={self.id!r}, source_type={self.source_type!r}, fingerprint={self.fingerprint!r})"


class IgnoreFingerprintRecord(BaseRecord):
"""Acceptedrisk model."""

__tablename__ = "ignore_fingerprint"
id = Column(Integer, primary_key=True)
fingerprint = Column(String(250))
ignore_type = Column(String(50))
reported_at = Column(DateTime, default=datetime.utcnow)
expires_at = Column(DateTime, default=None)
record_metadata = Column(JSONType())
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
fingerprint = sqlalchemy.Column(sqlalchemy.String(250))
ignore_type = sqlalchemy.Column(sqlalchemy.String(50))
reported_at = sqlalchemy.Column(sqlalchemy.DateTime, default=datetime.utcnow)
expires_at = sqlalchemy.Column(sqlalchemy.DateTime, default=None)
record_metadata = sqlalchemy.Column(JSONType())

SNOOZE = "snooze"
ACCEPT_RISK = "acceptrisk"
FALSE_POSITIVE = "falsepositive"
ACKNOWLEDGE = "acknowledge"
ESCALATE_MANUALLY = "escalate_manually"
RESOLVED = "resolved"

def __repr__(self):
return f"IgnoreFingerPrintRecord(id={self.id!r}, fingerprint={self.fingerprint!r}, ignore_type={self.ignore_type!r})"
30 changes: 30 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,33 @@ line_length = 120

[tool.black]
line-length = 120

[tool.mypy]
ignore_missing_imports = true
warn_unused_configs = true
disallow_any_generics = true
disallow_subclassing_any = true
# disallow_untyped_calls = true # temp disabled
disallow_untyped_defs = true
disallow_incomplete_defs = true
check_untyped_defs = true
disallow_untyped_decorators = true
no_implicit_optional = true
warn_redundant_casts = true
warn_unused_ignores = true
warn_return_any = true
no_implicit_reexport = true
strict_equality = true
plugins = "sqlmypy"

# Gradually roll out mypy by ignoring all files not explicitly opted in.
[[tool.mypy.overrides]]
module = "comet_core.*"
ignore_errors = true

[[tool.mypy.overrides]]
module = [
"comet_core.data_store",
"comet_core.models"
]
ignore_errors = false
17 changes: 14 additions & 3 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
# Install the package itself in dev mode
-e .

tox==3.24.1

# Formatters and linters
black==21.7b0
pylint==2.9.6
isort==5.8.0

# Testing
pytest==6.2.4
pytest-cov==2.12.0
pytest-freezegun==0.4.2
tox==3.24.1
pylint==2.9.6
isort==5.8.0

# Types
mypy==0.910
sqlalchemy-stubs==0.4
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

setuptools.setup(
name="comet-core",
version="2.10.0",
version="2.11.0",
url="https://github.com/spotify/comet-core",
author="Spotify Platform Security",
author_email="[email protected]",
Expand Down
13 changes: 0 additions & 13 deletions tests/__init__.py

This file was deleted.

60 changes: 53 additions & 7 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,75 @@
# limitations under the License.

"""Global test fixtures"""
import os
from datetime import datetime
from typing import List

import pytest

from comet_core import Comet
from comet_core.app import EventContainer
from comet_core.data_store import DataStore
from comet_core.model import EventRecord

# pylint: disable=redefined-outer-name


@pytest.fixture
def app():
def app() -> Comet:
"""Returns a Comet app."""
yield Comet()


@pytest.fixture
def test_db():
def messages() -> List[EventContainer]:
"""Get all test messages and their filenames as an iterator.
Returns:
EventContainer: some test event
"""
event = EventContainer("test", {})
event.set_owner("[email protected]")
event.set_fingerprint("test")
return [event]


@pytest.fixture
def data_store() -> DataStore:
"""Creates a SQLite backed datastore."""
return DataStore("sqlite://")


@pytest.fixture
def test_db(messages, data_store) -> DataStore:
"""Setup a test database fixture
Yields:
DataStore: a sqlite backed datastore with all test data
"""
from comet_core.data_store import DataStore
from tests.utils import get_all_test_messages

data_store = DataStore("sqlite://")
for event in get_all_test_messages(parsed=True):
for event in messages:
data_store.add_record(event.get_record())

yield data_store


@pytest.fixture
def data_store_with_test_events(data_store) -> DataStore:
"""Creates a populated data store."""
one = EventRecord(received_at=datetime(2018, 7, 7, 9, 0, 0), source_type="datastoretest", owner="a", data={})
one.fingerprint = "f1"
two = EventRecord(received_at=datetime(2018, 7, 7, 9, 30, 0), source_type="datastoretest", owner="a", data={})
two.fingerprint = "f2"
three = EventRecord(
received_at=datetime(2018, 7, 7, 9, 0, 0),
source_type="datastoretest2", # Note that this is another source type!
owner="b",
data={},
)
three.fingerprint = "f3"

data_store.add_record(one)
data_store.add_record(two)
data_store.add_record(three)

yield data_store
Loading

0 comments on commit 6610cd4

Please sign in to comment.