Skip to content

Commit

Permalink
Add params + better functionalities/files split
Browse files Browse the repository at this point in the history
  • Loading branch information
akantak committed Apr 24, 2024
1 parent bc3c9af commit ad677a0
Show file tree
Hide file tree
Showing 10 changed files with 186 additions and 167 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ repos:
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 24.4.0
rev: 24.4.1
hooks:
- id: black
- repo: https://github.com/PyCQA/isort
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "etrade_tax_poland"
version = "0.0.8"
version = "0.0.9"
authors = [
{ name="Aleksander Kantak", email="[email protected]" },
]
Expand Down
22 changes: 7 additions & 15 deletions src/etrade_tax_poland/__main__.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,19 @@
"""Read all E*Trade files and parse."""

import os
import sys

from .common import merge_csvs
from .args import parse_args
from .dividends import process_dividend_docs
from .files_handling import merge_csvs
from .stocks import process_stock_docs


def parse_all_docs():
def parse_all_docs(dir_path):
"""Figure out directory and run all functions on it."""
dir_path = "."
if len(sys.argv) > 1:
dir_path = sys.argv[1]
if not os.path.isdir(dir_path):
print("Provided path is not a directory")
sys.exit(1)
dir_path = os.path.abspath(dir_path)
process_dividend_docs(dir_path)
process_stock_docs(dir_path)


if __name__ == "__main__":

parse_all_docs()
merge_csvs()
args = parse_args()
parse_all_docs(args.dirpath)
if not args.no_xlsx:
merge_csvs()
18 changes: 18 additions & 0 deletions src/etrade_tax_poland/args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Parse CLI arguments."""

import argparse
import os
import sys


def parse_args():
"""Parse CLI arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("dirpath", nargs="?", default=".", help="Get statements path")
parser.add_argument("-x", "--no-xlsx", action="store_true")
args = parser.parse_args()
if not os.path.isdir(args.dirpath):
print("Provided path is not a directory")
sys.exit(1)
args.dirpath = os.path.abspath(args.dirpath)
return args
131 changes: 1 addition & 130 deletions src/etrade_tax_poland/common.py
Original file line number Diff line number Diff line change
@@ -1,132 +1,3 @@
"""Implement common functions for etrade documents processing."""

import datetime
import glob
import json
import os
from time import sleep

import pyexcel.cookbook as xlsck
import requests
from pypdf import PdfReader
"""Common variables and functions."""

TAX_PL = 0.19


def pdfs_in_dir(directory):
"""Get all PDF statements files."""
os.chdir(directory)
return glob.glob("*.pdf")


def file_to_text(filename):
"""Parse PDF file to text only."""
reader = PdfReader(filename)
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
return text


def merge_csvs():
"""Merge csvs into xlsx and remove them."""
files_list = glob.glob("*.csv")
xlsck.merge_all_to_a_book(files_list, "etrade.xlsx")
for file_name in files_list:
os.remove(file_name)


class NbpRatiosCache:
"""Cache data instead of always requesting from NBP."""

date_format = "%Y-%m-%d"
nbp_url = "https://api.nbp.pl/api/exchangerates/rates/a/usd/{}/?format=json"

def __init__(self):
"""Initialize objects and fields."""
self.cache_file = f"{os.path.dirname(os.path.abspath(__file__))}/nbp_cache.json"
self.read_cache()

def read_cache(self):
"""Read cache file."""
if not os.path.isfile(self.cache_file):
self.cache = {"_": ""}
return
with open(self.cache_file, "r", encoding="utf-8") as file:
self.cache = json.load(file)

def write_cache(self):
"""Write cache file."""
json_dump_params = {
"sort_keys": True,
"indent": 2,
"separators": (",", ": "),
}
with open(self.cache_file, "w", encoding="utf-8") as file:
json.dump(self.cache, file, **json_dump_params)
file.write("\n")

def get_ratio(self, date_obj):
"""Get ratio from cache if available, otherwise request from NBP."""
key = date_obj.strftime(self.date_format)
if key in self.cache:
if not self.cache[key]:
raise ValueError("Ratio for selected date is not available in NBP")
return self.cache[key]

while True:
try:
current_url = self.nbp_url.format(date_obj.strftime(self.date_format))
req = requests.get(current_url, timeout=5)
except (requests.exceptions.ProxyError, requests.exceptions.ConnectTimeout):
print("Timeout 5s when getting USD/PLN ratio, retrying after 1 second")
sleep(1)
continue
if req.status_code == 200:
self.cache[key] = req.json()["rates"][0]["mid"]
self.write_cache()
return self.cache[key]
if req.status_code == 404:
self.cache[key] = ""
self.write_cache()
raise ValueError("Ratio for selected date is not available in NBP")
print(f"{req.status_code} {req.text}")
print("Unhandled error when getting USD/PLN ratio, retrying after 1 second")
sleep(1)
continue


NBP_CACHE = NbpRatiosCache()


def date_to_usd_pln(date_obj):
"""Find 'day before vestment' USD/PLN ratio."""
while True:
date_obj -= datetime.timedelta(days=1)
try:
ratio = NBP_CACHE.get_ratio(date_obj)
break
except ValueError:
date_obj -= datetime.timedelta(days=1)
return (date_obj, ratio)


def save_csv(filename, header, lines):
"""Save header and lines to a csv file."""
if not lines:
return
with open(filename, "w", encoding="utf-8") as file:
file.write(f"{header}\n")
for line in lines:
file.write(f"{line}\n")


def sum_header():
"""Return sum csv file header."""
return ",".join(
[
"NAME",
"VALUE",
"PIT_FIELD",
]
)
20 changes: 11 additions & 9 deletions src/etrade_tax_poland/dividends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

import datetime

from . import common as etc
from . import files_handling as fh
from . import nbp
from .common import TAX_PL


class Dividend:
Expand Down Expand Up @@ -64,7 +66,7 @@ def insert_currencies_ratio(self, ratio_date, ratio_value):
self.ratio_date = ratio_date
self.ratio_value = ratio_value
self.pln_gross = round(self.usd_gross * ratio_value, 2)
self.flat_rate_tax = round(self.pln_gross * etc.TAX_PL, 2)
self.flat_rate_tax = round(self.pln_gross * TAX_PL, 2)
self.pln_tax_paid = round(self.usd_tax * ratio_value, 2)
self.pln_tax_due = self.flat_rate_tax - self.pln_tax_paid

Expand Down Expand Up @@ -139,22 +141,22 @@ def divs_sum_csved(dividends):

def process_dividend_docs(directory):
"""Count due tax based on statements files in directory."""
files = etc.pdfs_in_dir(directory)
files = fh.pdfs_in_dir(directory)
dividends = []
for filename in files:
text = etc.file_to_text(f"{directory}/{filename}")
text = fh.file_to_text(f"{directory}/{filename}")
if dividend := get_stock_dividend_from_text(text):
dividend.file = filename
dividend.insert_currencies_ratio(*etc.date_to_usd_pln(dividend.pay_date))
dividend.insert_currencies_ratio(*nbp.date_to_usd_pln(dividend.pay_date))
dividends.append(dividend)
if ldivs := get_liquidity_dividends_from_text(text):
for ldiv in ldivs:
ldiv.file = filename
ldiv.insert_currencies_ratio(*etc.date_to_usd_pln(ldiv.pay_date))
ldiv.insert_currencies_ratio(*nbp.date_to_usd_pln(ldiv.pay_date))
dividends += ldivs

for dividend in dividends:
dividend.insert_currencies_ratio(*etc.date_to_usd_pln(dividend.pay_date))
dividend.insert_currencies_ratio(*nbp.date_to_usd_pln(dividend.pay_date))

etc.save_csv("_dividend.csv", Dividend.csv_header(), [d.csved() for d in dividends])
etc.save_csv("dividends_summary.csv", etc.sum_header(), divs_sum_csved(dividends))
fh.save_csv("_dividend.csv", Dividend.csv_header(), [d.csved() for d in dividends])
fh.save_csv("dividends_summary.csv", fh.sum_header(), divs_sum_csved(dividends))
51 changes: 51 additions & 0 deletions src/etrade_tax_poland/files_handling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Implement common functions for files processing."""

import glob
import os

import pyexcel.cookbook as xlsck
from pypdf import PdfReader


def pdfs_in_dir(directory):
"""Get all PDF statements files."""
os.chdir(directory)
return glob.glob("*.pdf")


def file_to_text(filename):
"""Parse PDF file to text only."""
reader = PdfReader(filename)
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
return text


def save_csv(filename, header, lines):
"""Save header and lines to a csv file."""
if not lines:
return
with open(filename, "w", encoding="utf-8") as file:
file.write(f"{header}\n")
for line in lines:
file.write(f"{line}\n")


def merge_csvs():
"""Merge csvs into xlsx and remove them."""
files_list = glob.glob("*.csv")
xlsck.merge_all_to_a_book(files_list, "etrade.xlsx")
for file_name in files_list:
os.remove(file_name)


def sum_header():
"""Return sum csv file header."""
return ",".join(
[
"NAME",
"VALUE",
"PIT_FIELD",
]
)
83 changes: 83 additions & 0 deletions src/etrade_tax_poland/nbp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Implement NBP currencies ratios gathering."""

import datetime
import json
import os
from time import sleep

import requests


class NbpRatiosCache:
"""Cache data instead of always requesting from NBP."""

date_format = "%Y-%m-%d"
nbp_url = "https://api.nbp.pl/api/exchangerates/rates/a/usd/{}/?format=json"

def __init__(self):
"""Initialize objects and fields."""
self.cache_file = f"{os.path.dirname(os.path.abspath(__file__))}/nbp_cache.json"
self.read_cache()

def read_cache(self):
"""Read cache file."""
if not os.path.isfile(self.cache_file):
self.cache = {"_": ""}
return
with open(self.cache_file, "r", encoding="utf-8") as file:
self.cache = json.load(file)

def write_cache(self):
"""Write cache file."""
json_dump_params = {
"sort_keys": True,
"indent": 2,
"separators": (",", ": "),
}
with open(self.cache_file, "w", encoding="utf-8") as file:
json.dump(self.cache, file, **json_dump_params)
file.write("\n")

def get_ratio(self, date_obj):
"""Get ratio from cache if available, otherwise request from NBP."""
key = date_obj.strftime(self.date_format)
if key in self.cache:
if not self.cache[key]:
raise ValueError("Ratio for selected date is not available in NBP")
return self.cache[key]

while True:
try:
current_url = self.nbp_url.format(date_obj.strftime(self.date_format))
req = requests.get(current_url, timeout=5)
except (requests.exceptions.ProxyError, requests.exceptions.ConnectTimeout):
print("Timeout 5s when getting USD/PLN ratio, retrying after 1 second")
sleep(1)
continue
if req.status_code == 200:
self.cache[key] = req.json()["rates"][0]["mid"]
self.write_cache()
return self.cache[key]
if req.status_code == 404:
self.cache[key] = ""
self.write_cache()
raise ValueError("Ratio for selected date is not available in NBP")
print(f"{req.status_code} {req.text}")
print("Unhandled error when getting USD/PLN ratio, retrying after 1 second")
sleep(1)
continue


NBP_CACHE = NbpRatiosCache()


def date_to_usd_pln(date_obj):
"""Find 'day before vestment' USD/PLN ratio."""
while True:
date_obj -= datetime.timedelta(days=1)
try:
ratio = NBP_CACHE.get_ratio(date_obj)
break
except ValueError:
date_obj -= datetime.timedelta(days=1)
return (date_obj, ratio)
Loading

0 comments on commit ad677a0

Please sign in to comment.