Skip to content

Commit

Permalink
Merge pull request #303 from sodascience/develop
Browse files Browse the repository at this point in the history
Release 1.0.2
  • Loading branch information
vankesteren authored Aug 9, 2024
2 parents 6ddfbf8 + deb0f08 commit 4e72bb3
Show file tree
Hide file tree
Showing 10 changed files with 448 additions and 8 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/joss-paper-draft.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Draft PDF
on:
push:
paths:
- docs/paper/**
- .github/workflows/joss-paper-draft.yml

jobs:
paper:
runs-on: ubuntu-latest
name: Paper Draft
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Build draft PDF
uses: openjournals/openjournals-draft-action@master
with:
journal: joss
# This should be the path to the paper within your repo.
paper-path: docs/paper/paper.md
- name: Upload
uses: actions/upload-artifact@v4
with:
name: paper
# This is the output path where Pandoc will write the compiled
# PDF. Note, this should be the same directory as the input
# paper.md
path: docs/paper/paper.pdf
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Python package
name: Lint, test, documentation, plugins

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
branches: [ develop ]
schedule:
- cron: "0 0 * * 0"

Expand Down
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,13 @@ dmypy.json

#DS_Store
.DS_Store
.DS_Store
docs/source/images/pipeline.afphoto~lock~
docs/source/images/.$ClassDiagram.drawio.bkp
docs/source/images/.$distributions.drawio.bkp

# JOSS paper artefacts
docs/paper/*.jats
docs/paper/media

# Generated api docs stuff
docs/source/api/generated
8 changes: 8 additions & 0 deletions docs/paper/build_command.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

On windows:

docker run --rm --volume %cd%/docs/paper:/data --env JOURNAL=joss openjournals/inara

on unix:

docker run --rm --volume $PWD/docs/paper:/data --user $(id -u):$(id -g) --env JOURNAL=joss openjournals/inara
14 changes: 14 additions & 0 deletions docs/paper/img/logo.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
161 changes: 161 additions & 0 deletions docs/paper/paper.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
@misc{bates2019ons,
title={ONS methodology working paper series number 16—Synthetic data pilot},
author={Bates, AG and Spakulov{\'a}, I and Dove, I and Mealor, A},
year={2019}
}

@inproceedings{dwork2006differential,
title={Differential privacy},
author={Dwork, Cynthia},
booktitle={International colloquium on automata, languages, and programming},
pages={1--12},
year={2006},
organization={Springer}
}

@book{dewolf2012statistical,
title={Statistical disclosure control},
author={de Wolf, Peter-Paul},
year={2012},
publisher={Wiley \& Sons, Chichester}
}

@article{sweeney2002k,
title={k-anonymity: A model for protecting privacy},
author={Sweeney, Latanya},
journal={International journal of uncertainty, fuzziness and knowledge-based systems},
volume={10},
number={05},
pages={557--570},
year={2002},
publisher={World Scientific}
}

@misc{bond2015guidelines,
title={Guidelines for Output Checking. Eurostat},
author={Bond, S and Brandt, M and de Wolf, PP},
year={2015}
}

@article{dwork2010differential,
title={Differential privacy for statistics: What we know and what we want to learn},
author={Dwork, Cynthia and Smith, Adam},
journal={Journal of Privacy and Confidentiality},
volume={1},
number={2},
year={2010}
}

@book{hastie2009elements,
title={The elements of statistical learning: data mining, inference, and prediction},
author={Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome H and Friedman, Jerome H},
volume={2},
year={2009},
publisher={Springer}
}

@inproceedings{akaike1973information,
title={Information theory and an extension of the maximum likelihood principle},
author={Akaike, H},
booktitle={2nd International Symposium on Information Theory},
pages={267--281},
year={1973},
organization={Akad{\'e}miai Kiad{\'o} Location Budapest, Hungary}
}

@article{neath2012bayesian,
title={The Bayesian information criterion: background, derivation, and applications},
author={Neath, Andrew A and Cavanaugh, Joseph E},
journal={Wiley Interdisciplinary Reviews: Computational Statistics},
volume={4},
number={2},
pages={199--203},
year={2012},
publisher={Wiley Online Library}
}
@software{vink2024polars,
author = {Ritchie Vink and
Stijn de Gooijer and
Alexander Beedie and
Marco Edward Gorelli and
Weijie Guo and
J van Zundert and
Orson Peters and
Gert Hulselmans and
nameexhaustion and
Cory Grinstead and
Marshall and
Gijs Burghoorn and
chielP and
Itamar Turner-Trauring and
Matteo Santamaria and
Daniël Heres and
Lawrence Mitchell and
Josh Magarick and
ibENPC and
Karl Genockey and
Moritz Wilksch and
Jorge Leitao and
Mick van Gelderen and
Petros Barbagiannis and
Oliver Borchert and
deanm0000 and
Jonas Haag and
Henry Harbeck and
Liam Brannigan},
title = {pola-rs/polars: Python Polars},
year = 2024,
publisher = {Zenodo},
version = {py-1.4.1},
doi = {10.5281/zenodo.7697217},
url = {https://doi.org/10.5281/zenodo.7697217}
}

@article{wickham2014tidy,
title={Tidy Data},
volume={59},
url={https://www.jstatsoft.org/index.php/jss/article/view/v059i10},
doi={10.18637/jss.v059.i10},
abstract={A huge amount of effort is spent cleaning data to get it ready for analysis, but there has been little research on how to make data cleaning as easy and effective as possible. This paper tackles a small, but important, component of data cleaning: data tidying. Tidy datasets are easy to manipulate, model and visualize, and have a specific structure: each variable is a column, each observation is a row, and each type of observational unit is a table. This framework makes it easy to tidy messy datasets because only a small set of tools are needed to deal with a wide range of un-tidy datasets. This structure also makes it easier to develop tidy tools for data analysis, tools that both input and output tidy datasets. The advantages of a consistent data structure and matching tools are demonstrated with a case study free from mundane data manipulation chores.},
number={10},
journal={Journal of Statistical Software},
author={Wickham, Hadley},
year={2014},
pages={1–23}
}

# alternative synthetic data packages
@article{nowok2016synthpop,
title={synthpop: Bespoke creation of synthetic data in R},
author={Nowok, Beata and Raab, Gillian M and Dibben, Chris},
journal={Journal of statistical software},
volume={74},
pages={1--26},
year={2016}
}

@article{templ2017simulation,
title={Simulation of synthetic complex data: The R package simPop},
author={Templ, Matthias and Meindl, Bernhard and Kowarik, Alexander and Dupriez, Olivier},
journal={Journal of Statistical Software},
volume={79},
number={10},
pages={1--38},
year={2017},
publisher={UCLA, Dept. of Statistics}
}

@inproceedings{ping2017datasynthesizer,
title={Datasynthesizer: Privacy-preserving synthetic datasets},
author={Ping, Haoyue and Stoyanovich, Julia and Howe, Bill},
booktitle={Proceedings of the 29th International Conference on Scientific and Statistical Database Management},
pages={1--5},
year={2017}
}

@article{vankesteren2024democratize,
title={To democratize research with sensitive data, we should make synthetic data more accessible},
author={{van Kesteren}, Erik-Jan},
journal={arXiv preprint arXiv:2404.17271},
year={2024}
}
Loading

0 comments on commit 4e72bb3

Please sign in to comment.