Skip to content

Commit

Permalink
Updated load_responders to work with load_responses
Browse files Browse the repository at this point in the history
- Also changed load_responders to check if responder is a duplicate before saving
-
  • Loading branch information
JusticeV452 committed May 14, 2024
1 parent 9c23d67 commit 28e50bd
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 128 deletions.
162 changes: 74 additions & 88 deletions backend/app/management/commands/load_responders.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,36 @@
"""

import os
import pandas
import pandas as pd

from tqdm import tqdm

from django.conf import settings
from django.core.management.base import BaseCommand
from app.models import LoknitiResponders, LoknitiCodebook

# pylint: disable=duplicate-code
from .load_responses import load_responses

ATTR_TO_QUESTION = {
"state_name": "State Name",
"PC_id": "P.C. ID",
"AC_id": "A.C. ID",
"PS_id": "P.S. ID",
"respondent_no": "Respondent Number",
"age": "What is your age?",
"gender": "What is your gender?",
"caste": "What is your caste?",
"religion": "What is your religion?",
"income": "What is your total monthly household income?",
"education_level": "Up to what level have you studied?",
"occupation": "What is your main occupation?"
}

DISTINGUISHING_ATTRS = [
"state_name",
"PC_id",
"AC_id",
"PS_id",
"respondent_no"
]


class Command(BaseCommand):
Expand All @@ -23,99 +46,62 @@ class Command(BaseCommand):

def add_arguments(self, parser):
parser.add_argument(
"dataset_name",
"dataset_path",
type=str,
action="store",
help="Name of dataset in app/data folder (with extension)",
help="Path to dataset",
)
parser.add_argument(
"--hide_progress",
action="store_true",
help="Hide import progress bar"
)

def handle(self, *args, **options):
file_name = options.get("dataset_name")
file_path = os.path.join(settings.DATASET_DIR, file_name)
df = pandas.read_csv(file_path)

year = file_name[file_name.index("2"):file_name.index("2")+4]

cols = {}

question_column_mappings = [
("state", "State Name"),
("pc", "P.C. ID"),
("ac", "A.C. ID"),
("ps", "P.S. ID"),
("resno", "Respondent Number"),
("age", "What is your age?"),
("gender", "What is your gender?"),
("caste", "What is your caste?"),
("religion", "What is your religion?"),
("income", "What is your total monthly household income?"),
("education", "Up to what level have you studied?"),
("occupation", "What is your main occupation?")
]
file_path = options.get("dataset_path")
hide_progress = options.get("hide_progress")

df = pd.read_csv(file_path)
_, file_name = os.path.split(file_path)
year = file_name.split('_')[1]

attr_to_column = {}
if not LoknitiCodebook.objects.all():
raise Exception("""LoknitiCodebook needs to be populated first by running:
'python manage.py update_db --config_names lokniticodebook'""")

# Retrieve LoknitiCodebook question variables for each question text
for column_name, question_text in question_column_mappings:
for attr_name, question_text in ATTR_TO_QUESTION.items():
try:
cols[column_name] = LoknitiCodebook.objects.get(
attr_to_column[attr_name] = LoknitiCodebook.objects.get(
election_year=year, question_text=question_text
).question_var
except LoknitiCodebook.DoesNotExist:
print(column_name, None)

for (state_name, constituency_no, assembly_no, ps_no, respondent_no, age,
gender, caste, religion, income, education_level, occupation) in zip(
# column names based on file/election year
df[cols.get("state", "")] if cols.get(
"state", "") else [None]*len(df),
df[cols.get("pc", "")] if cols.get("pc", "") else [None]*len(df),
df[cols.get("ac", "")] if cols.get("ac", "") else [None]*len(df),
df[cols.get("ps", "")] if cols.get("ps", "") else [None]*len(df),
df[cols.get("resno", "")] if cols.get(
"resno", "") else [0]*len(df),
df[cols.get("age", "")] if cols.get("age", "") else [None]*len(df),
df[cols.get("gender", "")] if cols.get(
"gender", "") else [None]*len(df),
df[cols.get("caste", "")] if cols.get(
"caste", "") else [None]*len(df),
df[cols.get("religion", "")] if cols.get(
"religion", "") else [None]*len(df),
df[cols.get("income", "")] if cols.get(
"income", "") else [None]*len(df),
df[cols.get("education", "")
] if cols.get("education", "") else [None]*len(df),
df[cols.get("occupation", "")] if cols.get(
"occupation", "") else [None]*len(df),
):

try:
age = int(float(age))
except ValueError:
age = None

try:
respondent_no = (int(respondent_no))
except ValueError:
respondent_no = None

try:
ps_no = (int(ps_no))
except ValueError:
ps_no = None

responders = LoknitiResponders(
election_year=year,
state_name=state_name.split(": ", 1)[1],
PC_id=constituency_no,
AC_id=assembly_no,
PS_id=ps_no,
respondent_no=respondent_no,
age=age,
gender=gender,
caste=caste,
religion=religion,
income=income,
education_level=education_level,
occupation=occupation
attr_to_column[attr_name] = None
print(attr_name, "not recorded")

def get_column_value(row, column_name, attr_name):
value = getattr(row, column_name, None)
if pd.isnull(value):
value = None
if value is not None and attr_name in ["PS_id", "respondent_no", "age"]:
value = int(float(value))
return value

for row in tqdm(df.itertuples(), total=len(df), disable=hide_progress):
attr_dict = {
attr_name: get_column_value(row, column_name, attr_name)
for attr_name, column_name in attr_to_column.items()
if column_name is not None
}

# Responder already exists
if LoknitiResponders.objects.filter(**{
attr: attr_dict[attr] for attr in DISTINGUISHING_ATTRS}):
continue

responder = LoknitiResponders(
election_year=year, **attr_dict
)
# load_responses(responders, row, file_path)
responders.save()
responder.save()
load_responses(responder, row)
38 changes: 38 additions & 0 deletions backend/app/management/commands/load_responses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
Django management command helper load_responders
Updates local db with values from base csv dataset
"""

from app.models import LoknitiResponses, LoknitiCodebook
from django.db.models import Q


def get_question_vars(year):
"""
Returns a dict mapping question variables for the given year to their codebook entry
"""
codebook_entries = LoknitiCodebook.objects.filter(
Q(election_year=year) & Q(question_var__startswith='q'))
question_vars = {question.question_var: question for question in codebook_entries}
return question_vars


def load_responses(responder, row):
"""
Load Responses given Responder instance and Lokniti NES csv row
"""

# get survey questions
question_vars = get_question_vars(responder.election_year)

for q_var, codebook_entry in question_vars.items():
response = LoknitiResponses(
respondent_no=responder.respondent_no,
election_year=responder.election_year,
question_var=q_var,
response=getattr(row, q_var),
responder=responder,
entry=codebook_entry
)
response.save()
40 changes: 0 additions & 40 deletions backend/app/management/load_responses.py

This file was deleted.

0 comments on commit 28e50bd

Please sign in to comment.