Skip to content

Commit

Permalink
Fix lint
Browse files Browse the repository at this point in the history
  • Loading branch information
rexruan committed Mar 18, 2024
1 parent ccf4c27 commit 3969ff3
Show file tree
Hide file tree
Showing 12 changed files with 110 additions and 113 deletions.
44 changes: 21 additions & 23 deletions server/lib/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pandas as pd
from sqlalchemy.orm import Session

from server.lib.exceptions import DownloadError
from server.lib.fetch_features import get_features_for_items
from server.lib.utils import get_texts
from server.models import Text, Paragraph, Sentence
Expand All @@ -18,11 +19,6 @@
S = Union[int, float]


class DownloadError(Exception):
"""Download Error"""



async def download_texts(data: Dict[str, Any], db: Session) -> FileResponse:
language = data["lang"]
download_format = data["outputForm"]
Expand All @@ -39,7 +35,7 @@ async def download_statistics(data: Dict[str, Any], db: Session) -> FileResponse
language = data["lang"]
download_format = data["outputForm"]
features = get_chosen_aspects_and_features(selected_indeces=data["chosenFeatures"], references=data["featureList"])
texts = get_texts(db=db, language=language)
texts = [text for text in get_texts(db=db, language=language) if text.parsed]
with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", delete=False) as tmp:
tmp_name = tmp.name
Writer(
Expand Down Expand Up @@ -68,9 +64,10 @@ def _get_writer(self) -> IO[Any]:
if self.format == ".csv":
return csv.writer(self.file, delimiter=COLUMN_DELIMITER, escapechar="\\", quoting=csv.QUOTE_NONE)
if self.format == ".xlsx":
return pd.ExcelWriter(path=self.file.name, engine="openpyxl")
return pd.ExcelWriter(path=self.file.name, engine="openpyxl") # pylint: disable=abstract-class-instantiated

self._raise_format_error()
return None

def _write(self) -> Callable:
if self.format == ".txt":
Expand All @@ -81,10 +78,11 @@ def _write(self) -> Callable:
return self._write2xlsx

self._raise_format_error()
return None

def _write2xlsx(
self, arrays: List[List[str]], sheet_name: str,
indeces: Optional[List[str]] = None, columns: Optional[List[str]] = None
indeces: Optional[List[str]] = None, columns: Optional[List[str]] = None
) -> None:

if columns:
Expand All @@ -94,13 +92,12 @@ def _write2xlsx(
else:
df = pd.DataFrame(arrays, columns=columns)
df.to_excel(self.writer, index=False, columns=columns, sheet_name=sheet_name)
elif indeces:
df = pd.DataFrame(arrays, index=indeces)
df.to_excel(self.writer, header=False, sheet_name=sheet_name)
else:
if indeces:
df = pd.DataFrame(arrays, index=indeces)
df.to_excel(self.writer, header=False, sheet_name=sheet_name)
else:
df = pd.DataFrame(arrays)
df.to_excel(self.writer, header=False, index=False, sheet_name=sheet_name)
df = pd.DataFrame(arrays)
df.to_excel(self.writer, header=False, index=False, sheet_name=sheet_name)

def download_texts(self) -> None:
self._write_file_header()
Expand Down Expand Up @@ -144,7 +141,7 @@ def download_statistics_block_all(self):
self.download_statistics_aspect_body(aspect, sheet_name=f"overview-{level}-{aspect['aspect']}")

def download_statistics_block_ones(self):
self.aspects = [aspect for aspect in self.features]
self.aspects = list(self.features)
if self.format != ".xlsx":
self.write(self._txt_format("Detail"))
self.write("")
Expand Down Expand Up @@ -215,11 +212,11 @@ def _download_features_for_aspect(
for feature_name, feature_item in data.items():
feature_item.update({"name": feature_name})
self._write_feature(feature_item)
else:
return [
self._write_feature({"name": feature_name, **feature_item})
for feature_name, feature_item in data.items()
]
return None
return [
self._write_feature({"name": feature_name, **feature_item})
for feature_name, feature_item in data.items()
]

def _get_feature_string(
self, name: str, scalar: Union[int, float], mean: Union[int, float], median: Union[int, float]
Expand All @@ -230,16 +227,17 @@ def _txt_format(self, string: str) -> str:
return f"{string:^88}".replace(" ", "-")

def _write_feature(self, feature_item: Dict[str, Any]) -> Optional[Tuple[str, S, S, S]]:
name, scalar, mean, median = [feature_item.get(attr, "") for attr in ["name", "scalar", "mean", "median"]]
name, scalar, mean, median = (feature_item.get(attr, "") for attr in ("name", "scalar", "mean", "median"))
if self.format == ".xlsx":
return name, scalar, mean, median
self.write(self._get_feature_string(name, scalar, mean, median))
return None

def _write_file_header(self, is_statistic_file: bool = False) -> None:
file_headers = ["# Swegram", f"# Time: {_get_now()}", f"# Language: {self.language}"]

if is_statistic_file:
file_headers = [header_line.lstrip("# ") for header_line in file_headers ]
file_headers = [header_line.lstrip("# ") for header_line in file_headers]
file_headers.extend([
" AND ".join(self.blocks),
f"Texts: {', '.join([t.filename for t in self.texts])}",
Expand Down Expand Up @@ -301,7 +299,7 @@ def _raise_format_error(self):
raise DownloadError(f"Unknown format to download: {self.format}")

def _c(self, level: str) -> str:
return {"para": "paragraph", "sent": "sentence"}.get(level, level)
return {"para": "paragraph", "sent": "sentence"}.get(level, level)


def _get_index_and_data(lines: List[str]) -> Tuple[List[str], List[List[str]]]:
Expand Down
9 changes: 9 additions & 0 deletions server/lib/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""exception module"""


class ServerError(Exception):
"""server error"""


class DownloadError(Exception):
"""Download Error"""
4 changes: 2 additions & 2 deletions server/lib/fetch_current_sentences.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def fetch_current_sentences(text_id: int, page: int, db: Session) -> Dict[str, A
text = db.query(Text).get(ident=text_id)
sentences = db.query(Sentence) \
.filter(Sentence.uuid == text.uuid) \
.order_by(Sentence.id)[(int(page)-1) * PAGE_SIZE:int(page) * PAGE_SIZE]
.order_by(Sentence.id)[(int(page) - 1) * PAGE_SIZE: int(page) * PAGE_SIZE]

return JSONResponse({
"current_sentences": [{"tokens": sentence.serialize_tokens()} for sentence in sentences],
Expand All @@ -25,4 +25,4 @@ def fetch_current_sentences(text_id: int, page: int, db: Session) -> Dict[str, A
"page_size": PAGE_SIZE
})
except Exception as err:
raise HTTPException(status_code=500, detail=str(err))
raise HTTPException(status_code=500, detail=str(err)) from err
21 changes: 7 additions & 14 deletions server/lib/fetch_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def _fetch_text_ids_and_filenames(texts: List[Text]) -> List[Tuple[int, str]]:
return [(t.id, t.filename) for t in texts]


def _fetch_selected_text_ids(texts: List[Text]) -> List[int]:
def _fetch_selected_text_ids(texts: List[Text]) -> List[int]:
"""Fetch the selected text ids from texts"""
return [t.id for t in texts if t.activated]

Expand All @@ -28,21 +28,14 @@ def _update_metadata(metadata: Dict[str, Any], texts: List[Text]) -> Dict[str, A
texts_with_metadata.add(text_id)
else:
del value_dict[key][i]
values = [
{
'label':key,
'value': value + index,
'children':[
{
'value': v,
'label': l
} for v,l in value_dict[key]
]
} for index, key in enumerate(value_dict.keys(), 1) if has_values[index-1]]
values = [{
"label": key, "value": value + index,
"children": [{"value": v, "label": l} for v, l in value_dict[key]]
} for index, key in enumerate(value_dict.keys(), 1) if has_values[index - 1]]
if values:
options.append({'value':value, 'label':label, 'children': values})
options.append({"value": value, "label": label, "children": values})
value += len(value_dict) + 1

return {
"options": options,
"texts_with_metadata": list(texts_with_metadata)
Expand Down
41 changes: 18 additions & 23 deletions server/lib/fetch_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
from sqlalchemy.orm import Session

from server.models import Text, Paragraph, Sentence
from server.lib.exceptions import ServerError
from server.lib.utils import get_texts
from swegram_main.config import SUC_TAGS, PT_TAGS, PAGE_SIZE
from swegram_main.config import PAGE_SIZE
from swegram_main.lib.utils import mean, median, r2


ASPECT_LIST = ["general", "readability", "morph", "lexical", "syntactic"]
ASPECTS = ("general", "readability", "morph", "lexical", "syntactic")


class Annotation(BaseModel):
Expand Down Expand Up @@ -41,7 +42,7 @@ class State(BaseModel):
def post_states(data: Dict[str, Any], db: Session) -> Dict[str, Any]:
"""post states"""
language = data["lang"]
texts = get_texts(db, language)
texts = get_texts(db, language)
normalized, parsed, tokenized = [Annotation() for _ in range(3)]
_texts, paragraphs, sentences = 0, 0, 0
for text in texts:
Expand Down Expand Up @@ -72,7 +73,7 @@ def get_features(element: str, index: int, data: Dict[str, Any], db: Session) ->
if texts:
if element == "text":
content = texts[start_index:start_index + size]
elif element in ["sent", "para"]:
elif element in set({"sent", "para"}):
content = []
for text in texts:
number = text.sents if element == "sent" else len(text.paragraphs)
Expand All @@ -98,7 +99,7 @@ def get_features(element: str, index: int, data: Dict[str, Any], db: Session) ->
{
"name": k, **v
} for k, v in _content.as_dict()[aspect].items()]}
} for aspect in ASPECT_LIST if _content.as_dict().get(aspect)
} for aspect in ASPECTS if _content.as_dict().get(aspect)
]
})

Expand All @@ -109,27 +110,22 @@ def get_features(element: str, index: int, data: Dict[str, Any], db: Session) ->
}



def get_features_for_items(
level: str, texts: List[Text], features: Optional[Dict[str, List[str]]], aspects: Optional[List[str]] = None
) -> List[Dict[str, Any]]:
"""Fetch statistic based on aspect list and feature list.
:param features: Chosen features to be included, if features exist, it will overwrite aspects
:type features: Optional[Dict[str, List[str]]]
"""Fetch statistic based on aspect list and feature list
"""
# breakpoint()
if level in ["texts", "text"]:
if level in set({"texts", "text"}):
items = texts
elif level in ["paras", "paragraph"]:
elif level in set({"paras", "paragraph"}):
items: List[Paragraph] = [p for t in texts for p in t.paragraphs]
elif level in ["sents", "sentence"]:
elif level in set({"sents", "sentence"}):
items: List[Sentence] = [s for t in texts for p in t.paragraphs for s in p.sentences]
else:
raise
raise ServerError(f"Unknown level: {level}")

if features:
aspects = [aspect for aspect in features]
aspects = list(features)

aspect_data = []
for aspect in aspects:
Expand All @@ -140,7 +136,7 @@ def get_features_for_items(
if features and feature_name not in features.get(aspect, {}):
continue
if feature_name in _aspect_dict:
for metric in ["mean", "median", "scalar"]:
for metric in set({"mean", "median", "scalar"}):
_aspect_dict[feature_name][metric].append(feature_data.get(metric))
else:
_aspect_dict[feature_name] = {
Expand All @@ -149,11 +145,9 @@ def get_features_for_items(
"scalar": [feature_data.get("scalar")]
}
for feature_name, feature_data in _aspect_dict.items():

_aspect_dict[feature_name]["mean"] = mean([value for value in feature_data["mean"] if value != ""])
_aspect_dict[feature_name]["median"] = median([value for value in feature_data["median"] if value != ""])
_aspect_dict[feature_name]["scalar"] = r2(sum([value for value in feature_data["scalar"] if value]))

feature_data["mean"] = mean(value for value in feature_data["mean"] if value != "")
feature_data["median"] = median(value for value in feature_data["median"] if value != "")
feature_data["scalar"] = r2(sum(value for value in feature_data["scalar"] if value))

aspect_data.append({
"aspect": aspect,
Expand All @@ -162,9 +156,10 @@ def get_features_for_items(

return aspect_data


def get_overview_features_for_level(
level: str, data: Dict[str, Any], db: Session,
aspects: List[str] = ASPECT_LIST, features: Optional[Dict[str, List[str]]] = None
aspects: List[str] = ASPECTS, features: Optional[Dict[str, List[str]]] = None
) -> Dict[str, Any]:
language = data["lang"]
texts = get_texts(db, language)
Expand Down
6 changes: 3 additions & 3 deletions server/lib/fetch_frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@

def fetch_frequencies(category: str, tagset: str, data: Dict[str, Any], db: Session) -> Dict[str, Any]:
language = data["lang"]
texts = get_texts(db, language, category=category)
texts = [text for text in get_texts(db, language, category=category) if text.parsed]
type_dict, pos_dict = get_type_and_pos_dicts(category=category, tagset=tagset, texts=texts)

return {
f"{category}_pos": [
{
"count": c, "pos": k.split("_", maxsplit=1)[-1], category: k.rsplit("_", maxsplit=1)[0]
} for k, c in sorted(list(type_dict.items()), key=lambda x:x[1], reverse=True)
} for k, c in sorted(list(type_dict.items()), key=lambda x: x[1], reverse=True)
],
"pos_list": sorted(pos_dict.items(), key=lambda x:x[1], reverse=True),
"pos_list": sorted(pos_dict.items(), key=lambda x: x[1], reverse=True),
"number_of_texts": len(texts)
}
10 changes: 5 additions & 5 deletions server/lib/fetch_lengths.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
from swegram_main.config import PT_TAGS, SUC_TAGS


PUNCT_TAGS = [*SUC_TAGS[-3:], *PT_TAGS[-10:], "PUNCT"]
PUNCT_TAGS = [*SUC_TAGS[-3:], *PT_TAGS[-10:], "PUNCT"]


def fetch_lengths(category: str, tagset: str, data: Dict[str, Any], db: Session) -> Dict[str, Any]:
language = data["lang"]
texts = get_texts(db, language, category=category)
texts = [text for text in get_texts(db, language, category=category) if text.parsed]
type_dict, pos_dict = get_type_and_pos_dicts(category=category, tagset=tagset, texts=texts)

sorted_pos_list = [pos for pos, _ in sorted(pos_dict.items(), key=lambda x:x[1], reverse=True)]
sorted_pos_list = [pos for pos, _ in sorted(pos_dict.items(), key=lambda x: x[1], reverse=True)]
length_dict = {} # {1: {PP: {word: count}}}

for type_pos, count in type_dict.items():
Expand Down Expand Up @@ -54,12 +54,12 @@ def fetch_lengths(category: str, tagset: str, data: Dict[str, Any], db: Session)
"pos_list": [
{
"label": e, "prop": e
} for e in ["Length", *sorted_pos_list, "Total"]
} for e in ("Length", *sorted_pos_list, "Total")
],
"length_list": [{
**length,
"Total": {
"total": sum([data_dict["count"] for data_dict in length["Length"]["data"]]),
"total": sum(data_dict["count"] for data_dict in length["Length"]["data"]),
"data": []
}
} for length in length_list]
Expand Down
Loading

0 comments on commit 3969ff3

Please sign in to comment.