diff --git a/README.md b/README.md
index 36fc773..902d355 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,4 @@
-Observatoire des imaginaires
-================
-
+# Observatoire des imaginaires
 
 ## Installing with poetry
 
@@ -65,7 +63,7 @@ pip install poetry
    jupyter notebook
    ```
 
-## Download datasets from kaggle 
+## Download datasets from kaggle
 
 If you want to use kaggle to download datasets, please make sure to have api's credentials in ~/.kaggle/kaggle.json.
 
@@ -78,8 +76,8 @@ make download-tmdb-movies-dataset
 make download-full-tmdb-tv-shows-dataset
 ```
 
-
 Alternatively you can download directly the datasets from kaggle website :
+
 - [tmdb-movies-dataset](https://www.kaggle.com/datasets/asaniczka/tmdb-movies-dataset-2023-930k-movies)
 - [full-tmdb-tv-shows-dataset](https://www.kaggle.com/datasets/asaniczka/full-tmdb-tv-shows-dataset-2023-150k-shows)
 
@@ -87,15 +85,17 @@ Alternatively you can download directly the datasets from kaggle website :
 
 The [site-observable](https://github.com/dataforgoodfr/12_observatoire_des_imaginaires/tree/main/site-observable) directory contains
 an observable framework site that collect film and movie data from the above datasets on kaggle and filters the datasets according
-to the following rules in order to reduced the size of the data present on the generated web site.  This site provides a search UI
-allow a user to select a specific movie or TV show.  The user can then click on the link for their selection to kick off the
+to the following rules in order to reduced the size of the data present on the generated web site. This site provides a search UI
+allow a user to select a specific movie or TV show. The user can then click on the link for their selection to kick off the
 questionnaire on tally andis destined to be embedded in an iframe in the main Observatoire des Imaginaires web site.
 
 Movies:
+
 - filter out adult movies
 - filter out movies released more that two years ago
 
 TV Shows:
+
 - filter out adult shows
 
 The web site is currently hosted on the [Observable hosting platform](https://observablehq.com/) and is available at the following URL:
@@ -106,10 +106,15 @@ https://observatoire-des-imaginaires.observablehq.cloud/questionnaire
 
 [Install precommits](https://pre-commit.com/)
 
-
-    pre-commit run --all-files 
- 
+    pre-commit run --all-files
 
 ## Use Tox to test your code
 
     tox -vv
+
+## Tasks
+
+This repo includes invoke for pythonic task execution. To see the
+is of available tasks you can run:
+
+invoke -l
diff --git a/poetry.lock b/poetry.lock
index fd4a6b3..7cf0f4b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
 
 [[package]]
 name = "anyio"
@@ -642,6 +642,17 @@ files = [
     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
 ]
 
+[[package]]
+name = "invoke"
+version = "2.2.0"
+description = "Pythonic task execution"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "invoke-2.2.0-py3-none-any.whl", hash = "sha256:6ea924cc53d4f78e3d98bc436b08069a03077e6f85ad1ddaa8a116d7dad15820"},
+    {file = "invoke-2.2.0.tar.gz", hash = "sha256:ee6cbb101af1a859c7fe84f2a264c059020b0cb7fe3535f9424300ab568f6bd5"},
+]
+
 [[package]]
 name = "ipykernel"
 version = "5.5.6"
@@ -1788,13 +1799,6 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
     {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
-    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
     {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
     {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
     {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@@ -2273,28 +2277,6 @@ webencodings = ">=0.4"
 doc = ["sphinx", "sphinx_rtd_theme"]
 test = ["flake8", "isort", "pytest"]
 
-[[package]]
-name = "six"
-version = "1.16.0"
-description = "Python 2 and 3 compatibility utilities"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
-files = [
-    {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
-    {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
-]
-
-[[package]]
-name = "text-unidecode"
-version = "1.3"
-description = "The most basic Text::Unidecode port"
-optional = false
-python-versions = "*"
-files = [
-    {file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"},
-    {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"},
-]
-
 [[package]]
 name = "tomli"
 version = "2.0.1"
@@ -2528,4 +2510,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "eeb9ba8f43237f51301a5ec4c62d79a177ae19b81bbf920c0947d53ccca6ec00"
+content-hash = "4f39c5e267d633fab26b42f0bffa6f1928ae0d8b3c3230112411a703c4762260"
diff --git a/pyproject.toml b/pyproject.toml
index 57841a9..dd1755c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,7 @@ pre-commit = "^2.20.0"
 pytest = "^7.2.0"
 # ruff = "^0.0.254"
 tox = "^4.4.8"
+invoke = "^2.2.0"
 
 [tool.ruff]
 select = [
diff --git a/site-observable/docs/data/films.sqlite.py b/site-observable/docs/data/films.sqlite.py
index c770651..0b747e2 100755
--- a/site-observable/docs/data/films.sqlite.py
+++ b/site-observable/docs/data/films.sqlite.py
@@ -1,7 +1,7 @@
 import os
 import sqlite3
 import tempfile
-from datetime import datetime, timedelta
+from datetime import datetime
 
 import pandas as pd
 
@@ -18,12 +18,15 @@
     # Remove adult movies
     df = df[df["adult"] == False]  # noqa: E712
 
-    # Calculate the date for the past two years
-    years_ago = datetime.now() - timedelta(days=365 * 2)
-    start_date = years_ago.replace(month=1, day=1)
+    # Remove documentaries
+    df = df[df["genres"].str.contains("Documentary") == False]  # noqa: E712
 
-    # Filter the dataframe based on the start date
-    df = df[df["release_date"] >= start_date]
+    # Remove movies with a future release date
+    now = datetime.now()
+    df = df[df["release_date"] < now]
+
+    # Remove movies with no known revenue
+    df = df[df["revenue"] > 0]
 
     # Add a column with the production_year based on the release_date
     df["production_year"] = df["release_date"].dt.year
diff --git a/site-observable/docs/data/shows.sqlite.py b/site-observable/docs/data/shows.sqlite.py
index e8211d7..2cfde7a 100755
--- a/site-observable/docs/data/shows.sqlite.py
+++ b/site-observable/docs/data/shows.sqlite.py
@@ -1,6 +1,7 @@
 import os
 import sqlite3
 import tempfile
+from datetime import datetime
 
 import pandas as pd
 
@@ -12,11 +13,18 @@
     )
     os.system("unzip full-tmdb-tv-shows-dataset-2023-150k-shows.zip >&2")
 
-    df = pd.read_csv("TMDB_tv_dataset_v3.csv")
+    df = pd.read_csv("TMDB_tv_dataset_v3.csv", parse_dates=["first_air_date"])
 
     # Remove adult movies
     df = df[df["adult"] == False]  # noqa: E712
 
+    # Remove documentaries
+    df = df[df["genres"].str.contains("Documentary") == False]  # noqa: E712
+
+    # Remove shows with a future first air date or no first air date
+    now = datetime.now()
+    df = df[df["first_air_date"] < now]
+
     # Select the columns we want
     df = df[["id", "name", "original_name", "poster_path"]]
 
diff --git a/tasks.py b/tasks.py
new file mode 100644
index 0000000..f44ff1e
--- /dev/null
+++ b/tasks.py
@@ -0,0 +1,6 @@
+from invoke import Context, task
+
+
+@task
+def clean_branches(c: Context) -> None:
+    c.run("git branch --merged | grep -v '\\*\\|main' | xargs -n 1 git branch -d")