From ad677a024f9f6386adca96290ffb68cf71404324 Mon Sep 17 00:00:00 2001 From: Aleksander Kantak Date: Wed, 24 Apr 2024 18:45:54 +0200 Subject: [PATCH] Add params + better functionalities/files split --- .pre-commit-config.yaml | 2 +- pyproject.toml | 2 +- src/etrade_tax_poland/__main__.py | 22 +--- src/etrade_tax_poland/args.py | 18 +++ src/etrade_tax_poland/common.py | 131 +------------------ src/etrade_tax_poland/dividends.py | 20 +-- src/etrade_tax_poland/files_handling.py | 51 ++++++++ src/etrade_tax_poland/nbp.py | 83 ++++++++++++ src/etrade_tax_poland/stocks.py | 22 ++-- src/etrade_tax_poland/util_fill_nbp_cache.py | 2 +- 10 files changed, 186 insertions(+), 167 deletions(-) create mode 100644 src/etrade_tax_poland/args.py create mode 100644 src/etrade_tax_poland/files_handling.py create mode 100644 src/etrade_tax_poland/nbp.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9b64cd0..7863103 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,7 +5,7 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/psf/black - rev: 24.4.0 + rev: 24.4.1 hooks: - id: black - repo: https://github.com/PyCQA/isort diff --git a/pyproject.toml b/pyproject.toml index a106a59..e459d46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "etrade_tax_poland" -version = "0.0.8" +version = "0.0.9" authors = [ { name="Aleksander Kantak", email="aleksander.kantak@intel.com" }, ] diff --git a/src/etrade_tax_poland/__main__.py b/src/etrade_tax_poland/__main__.py index ede577e..7fda31a 100644 --- a/src/etrade_tax_poland/__main__.py +++ b/src/etrade_tax_poland/__main__.py @@ -1,27 +1,19 @@ """Read all E*Trade files and parse.""" -import os -import sys - -from .common import merge_csvs +from .args import parse_args from .dividends import process_dividend_docs +from .files_handling import merge_csvs from .stocks import process_stock_docs -def parse_all_docs(): +def parse_all_docs(dir_path): """Figure out directory and run all functions on it.""" - dir_path = "." - if len(sys.argv) > 1: - dir_path = sys.argv[1] - if not os.path.isdir(dir_path): - print("Provided path is not a directory") - sys.exit(1) - dir_path = os.path.abspath(dir_path) process_dividend_docs(dir_path) process_stock_docs(dir_path) if __name__ == "__main__": - - parse_all_docs() - merge_csvs() + args = parse_args() + parse_all_docs(args.dirpath) + if not args.no_xlsx: + merge_csvs() diff --git a/src/etrade_tax_poland/args.py b/src/etrade_tax_poland/args.py new file mode 100644 index 0000000..3c22a67 --- /dev/null +++ b/src/etrade_tax_poland/args.py @@ -0,0 +1,18 @@ +"""Parse CLI arguments.""" + +import argparse +import os +import sys + + +def parse_args(): + """Parse CLI arguments.""" + parser = argparse.ArgumentParser() + parser.add_argument("dirpath", nargs="?", default=".", help="Get statements path") + parser.add_argument("-x", "--no-xlsx", action="store_true") + args = parser.parse_args() + if not os.path.isdir(args.dirpath): + print("Provided path is not a directory") + sys.exit(1) + args.dirpath = os.path.abspath(args.dirpath) + return args diff --git a/src/etrade_tax_poland/common.py b/src/etrade_tax_poland/common.py index fe5d380..10a82ee 100644 --- a/src/etrade_tax_poland/common.py +++ b/src/etrade_tax_poland/common.py @@ -1,132 +1,3 @@ -"""Implement common functions for etrade documents processing.""" - -import datetime -import glob -import json -import os -from time import sleep - -import pyexcel.cookbook as xlsck -import requests -from pypdf import PdfReader +"""Common variables and functions.""" TAX_PL = 0.19 - - -def pdfs_in_dir(directory): - """Get all PDF statements files.""" - os.chdir(directory) - return glob.glob("*.pdf") - - -def file_to_text(filename): - """Parse PDF file to text only.""" - reader = PdfReader(filename) - text = "" - for page in reader.pages: - text += page.extract_text() + "\n" - return text - - -def merge_csvs(): - """Merge csvs into xlsx and remove them.""" - files_list = glob.glob("*.csv") - xlsck.merge_all_to_a_book(files_list, "etrade.xlsx") - for file_name in files_list: - os.remove(file_name) - - -class NbpRatiosCache: - """Cache data instead of always requesting from NBP.""" - - date_format = "%Y-%m-%d" - nbp_url = "https://api.nbp.pl/api/exchangerates/rates/a/usd/{}/?format=json" - - def __init__(self): - """Initialize objects and fields.""" - self.cache_file = f"{os.path.dirname(os.path.abspath(__file__))}/nbp_cache.json" - self.read_cache() - - def read_cache(self): - """Read cache file.""" - if not os.path.isfile(self.cache_file): - self.cache = {"_": ""} - return - with open(self.cache_file, "r", encoding="utf-8") as file: - self.cache = json.load(file) - - def write_cache(self): - """Write cache file.""" - json_dump_params = { - "sort_keys": True, - "indent": 2, - "separators": (",", ": "), - } - with open(self.cache_file, "w", encoding="utf-8") as file: - json.dump(self.cache, file, **json_dump_params) - file.write("\n") - - def get_ratio(self, date_obj): - """Get ratio from cache if available, otherwise request from NBP.""" - key = date_obj.strftime(self.date_format) - if key in self.cache: - if not self.cache[key]: - raise ValueError("Ratio for selected date is not available in NBP") - return self.cache[key] - - while True: - try: - current_url = self.nbp_url.format(date_obj.strftime(self.date_format)) - req = requests.get(current_url, timeout=5) - except (requests.exceptions.ProxyError, requests.exceptions.ConnectTimeout): - print("Timeout 5s when getting USD/PLN ratio, retrying after 1 second") - sleep(1) - continue - if req.status_code == 200: - self.cache[key] = req.json()["rates"][0]["mid"] - self.write_cache() - return self.cache[key] - if req.status_code == 404: - self.cache[key] = "" - self.write_cache() - raise ValueError("Ratio for selected date is not available in NBP") - print(f"{req.status_code} {req.text}") - print("Unhandled error when getting USD/PLN ratio, retrying after 1 second") - sleep(1) - continue - - -NBP_CACHE = NbpRatiosCache() - - -def date_to_usd_pln(date_obj): - """Find 'day before vestment' USD/PLN ratio.""" - while True: - date_obj -= datetime.timedelta(days=1) - try: - ratio = NBP_CACHE.get_ratio(date_obj) - break - except ValueError: - date_obj -= datetime.timedelta(days=1) - return (date_obj, ratio) - - -def save_csv(filename, header, lines): - """Save header and lines to a csv file.""" - if not lines: - return - with open(filename, "w", encoding="utf-8") as file: - file.write(f"{header}\n") - for line in lines: - file.write(f"{line}\n") - - -def sum_header(): - """Return sum csv file header.""" - return ",".join( - [ - "NAME", - "VALUE", - "PIT_FIELD", - ] - ) diff --git a/src/etrade_tax_poland/dividends.py b/src/etrade_tax_poland/dividends.py index 06492e2..099a274 100644 --- a/src/etrade_tax_poland/dividends.py +++ b/src/etrade_tax_poland/dividends.py @@ -2,7 +2,9 @@ import datetime -from . import common as etc +from . import files_handling as fh +from . import nbp +from .common import TAX_PL class Dividend: @@ -64,7 +66,7 @@ def insert_currencies_ratio(self, ratio_date, ratio_value): self.ratio_date = ratio_date self.ratio_value = ratio_value self.pln_gross = round(self.usd_gross * ratio_value, 2) - self.flat_rate_tax = round(self.pln_gross * etc.TAX_PL, 2) + self.flat_rate_tax = round(self.pln_gross * TAX_PL, 2) self.pln_tax_paid = round(self.usd_tax * ratio_value, 2) self.pln_tax_due = self.flat_rate_tax - self.pln_tax_paid @@ -139,22 +141,22 @@ def divs_sum_csved(dividends): def process_dividend_docs(directory): """Count due tax based on statements files in directory.""" - files = etc.pdfs_in_dir(directory) + files = fh.pdfs_in_dir(directory) dividends = [] for filename in files: - text = etc.file_to_text(f"{directory}/{filename}") + text = fh.file_to_text(f"{directory}/{filename}") if dividend := get_stock_dividend_from_text(text): dividend.file = filename - dividend.insert_currencies_ratio(*etc.date_to_usd_pln(dividend.pay_date)) + dividend.insert_currencies_ratio(*nbp.date_to_usd_pln(dividend.pay_date)) dividends.append(dividend) if ldivs := get_liquidity_dividends_from_text(text): for ldiv in ldivs: ldiv.file = filename - ldiv.insert_currencies_ratio(*etc.date_to_usd_pln(ldiv.pay_date)) + ldiv.insert_currencies_ratio(*nbp.date_to_usd_pln(ldiv.pay_date)) dividends += ldivs for dividend in dividends: - dividend.insert_currencies_ratio(*etc.date_to_usd_pln(dividend.pay_date)) + dividend.insert_currencies_ratio(*nbp.date_to_usd_pln(dividend.pay_date)) - etc.save_csv("_dividend.csv", Dividend.csv_header(), [d.csved() for d in dividends]) - etc.save_csv("dividends_summary.csv", etc.sum_header(), divs_sum_csved(dividends)) + fh.save_csv("_dividend.csv", Dividend.csv_header(), [d.csved() for d in dividends]) + fh.save_csv("dividends_summary.csv", fh.sum_header(), divs_sum_csved(dividends)) diff --git a/src/etrade_tax_poland/files_handling.py b/src/etrade_tax_poland/files_handling.py new file mode 100644 index 0000000..0e53b38 --- /dev/null +++ b/src/etrade_tax_poland/files_handling.py @@ -0,0 +1,51 @@ +"""Implement common functions for files processing.""" + +import glob +import os + +import pyexcel.cookbook as xlsck +from pypdf import PdfReader + + +def pdfs_in_dir(directory): + """Get all PDF statements files.""" + os.chdir(directory) + return glob.glob("*.pdf") + + +def file_to_text(filename): + """Parse PDF file to text only.""" + reader = PdfReader(filename) + text = "" + for page in reader.pages: + text += page.extract_text() + "\n" + return text + + +def save_csv(filename, header, lines): + """Save header and lines to a csv file.""" + if not lines: + return + with open(filename, "w", encoding="utf-8") as file: + file.write(f"{header}\n") + for line in lines: + file.write(f"{line}\n") + + +def merge_csvs(): + """Merge csvs into xlsx and remove them.""" + files_list = glob.glob("*.csv") + xlsck.merge_all_to_a_book(files_list, "etrade.xlsx") + for file_name in files_list: + os.remove(file_name) + + +def sum_header(): + """Return sum csv file header.""" + return ",".join( + [ + "NAME", + "VALUE", + "PIT_FIELD", + ] + ) diff --git a/src/etrade_tax_poland/nbp.py b/src/etrade_tax_poland/nbp.py new file mode 100644 index 0000000..8edc92e --- /dev/null +++ b/src/etrade_tax_poland/nbp.py @@ -0,0 +1,83 @@ +"""Implement NBP currencies ratios gathering.""" + +import datetime +import json +import os +from time import sleep + +import requests + + +class NbpRatiosCache: + """Cache data instead of always requesting from NBP.""" + + date_format = "%Y-%m-%d" + nbp_url = "https://api.nbp.pl/api/exchangerates/rates/a/usd/{}/?format=json" + + def __init__(self): + """Initialize objects and fields.""" + self.cache_file = f"{os.path.dirname(os.path.abspath(__file__))}/nbp_cache.json" + self.read_cache() + + def read_cache(self): + """Read cache file.""" + if not os.path.isfile(self.cache_file): + self.cache = {"_": ""} + return + with open(self.cache_file, "r", encoding="utf-8") as file: + self.cache = json.load(file) + + def write_cache(self): + """Write cache file.""" + json_dump_params = { + "sort_keys": True, + "indent": 2, + "separators": (",", ": "), + } + with open(self.cache_file, "w", encoding="utf-8") as file: + json.dump(self.cache, file, **json_dump_params) + file.write("\n") + + def get_ratio(self, date_obj): + """Get ratio from cache if available, otherwise request from NBP.""" + key = date_obj.strftime(self.date_format) + if key in self.cache: + if not self.cache[key]: + raise ValueError("Ratio for selected date is not available in NBP") + return self.cache[key] + + while True: + try: + current_url = self.nbp_url.format(date_obj.strftime(self.date_format)) + req = requests.get(current_url, timeout=5) + except (requests.exceptions.ProxyError, requests.exceptions.ConnectTimeout): + print("Timeout 5s when getting USD/PLN ratio, retrying after 1 second") + sleep(1) + continue + if req.status_code == 200: + self.cache[key] = req.json()["rates"][0]["mid"] + self.write_cache() + return self.cache[key] + if req.status_code == 404: + self.cache[key] = "" + self.write_cache() + raise ValueError("Ratio for selected date is not available in NBP") + print(f"{req.status_code} {req.text}") + print("Unhandled error when getting USD/PLN ratio, retrying after 1 second") + sleep(1) + continue + + +NBP_CACHE = NbpRatiosCache() + + +def date_to_usd_pln(date_obj): + """Find 'day before vestment' USD/PLN ratio.""" + while True: + date_obj -= datetime.timedelta(days=1) + try: + ratio = NBP_CACHE.get_ratio(date_obj) + break + except ValueError: + date_obj -= datetime.timedelta(days=1) + return (date_obj, ratio) diff --git a/src/etrade_tax_poland/stocks.py b/src/etrade_tax_poland/stocks.py index e3d9b5b..a5dcf97 100644 --- a/src/etrade_tax_poland/stocks.py +++ b/src/etrade_tax_poland/stocks.py @@ -2,7 +2,9 @@ from datetime import datetime -from . import common as etc +from . import files_handling as fh +from . import nbp +from .common import TAX_PL class Trade: @@ -259,7 +261,7 @@ def stocks_sum_csved(stocks): tax_deductible = sum(s.buy_tax_deductible for s in stocks) profit = other_income - tax_deductible rounded_profit = int(round(profit, 0)) - tax_base = rounded_profit * etc.TAX_PL + tax_base = rounded_profit * TAX_PL tax_rounded = int(round(tax_base, 0)) return [ f"other income,{other_income:.2f},PIT-38/C/22&24", @@ -273,14 +275,14 @@ def stocks_sum_csved(stocks): def process_stock_docs(directory): """Process all docs and find stocks data.""" - files = etc.pdfs_in_dir(directory) + files = fh.pdfs_in_dir(directory) espps = [] # Employee Stock Purchase Plan rests = [] # Restricted Stock trades = [] # stocks sell events for filename in files: full_path = f"{directory}/{filename}" - text = etc.file_to_text(full_path) + text = fh.file_to_text(full_path) if espp := espp_from_text(text): espp.file = full_path espps.append(espp) @@ -289,13 +291,13 @@ def process_stock_docs(directory): rests.append(rest) if trade := trade_from_text(text): trade.file = full_path - trade.insert_currencies_ratio(*etc.date_to_usd_pln(trade.trade_date)) + trade.insert_currencies_ratio(*nbp.date_to_usd_pln(trade.trade_date)) trades.append(trade) ses = [StockEvent(x) for x in espps + rests + trades] - etc.save_csv("_espp.csv", EsppStock.csv_header(), [e.csved() for e in espps]) - etc.save_csv("_rs.csv", RestrictedStock.csv_header(), [r.csved() for r in rests]) - etc.save_csv("_trade.csv", Trade.csv_header(), [t.csved() for t in trades]) - etc.save_csv("_stocks.csv", StockEvent.csv_header(), [s.csved() for s in ses]) - etc.save_csv("stocks_summary.csv", etc.sum_header(), stocks_sum_csved(ses)) + fh.save_csv("_espp.csv", EsppStock.csv_header(), [e.csved() for e in espps]) + fh.save_csv("_rs.csv", RestrictedStock.csv_header(), [r.csved() for r in rests]) + fh.save_csv("_trade.csv", Trade.csv_header(), [t.csved() for t in trades]) + fh.save_csv("_stocks.csv", StockEvent.csv_header(), [s.csved() for s in ses]) + fh.save_csv("stocks_summary.csv", fh.sum_header(), stocks_sum_csved(ses)) diff --git a/src/etrade_tax_poland/util_fill_nbp_cache.py b/src/etrade_tax_poland/util_fill_nbp_cache.py index ddd002d..1b580d6 100644 --- a/src/etrade_tax_poland/util_fill_nbp_cache.py +++ b/src/etrade_tax_poland/util_fill_nbp_cache.py @@ -2,7 +2,7 @@ import datetime -from .common import date_to_usd_pln +from .nbp import date_to_usd_pln date_obj = datetime.datetime.now() end = datetime.datetime.fromisoformat("2020-01-01")