Skip to content

Commit

Permalink
use income column in prepare persons
Browse files Browse the repository at this point in the history
  • Loading branch information
rakow committed Mar 26, 2024
1 parent 8aef0fd commit 5da9b72
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 7 deletions.
2 changes: 1 addition & 1 deletion matsim/scenariogen/data/formats/srv.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ def parse_zone(h, prefix=""):
ob = parse_int_str(getattr(h, prefix + "OBERBEZIRK"))

zone = pd.NA
if ob:
if ob and hasattr(h, prefix + "UNTERBEZIRK"):
zone = ob
ub = parse_int_str(getattr(h, prefix + "UNTERBEZIRK"))
if ub:
Expand Down
104 changes: 98 additions & 6 deletions matsim/scenariogen/data/preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ def prepare_persons(hh, pp, tt, augment=5, max_hh_size=5, core_weekday=False, re
""" Cleans common data errors and fill missing values """
df = pp.join(hh, on="hh_id", lsuffix="hh_")

# Replace unknown income group
fill(df, "income", -1)

# Replace unknown economic status
df["economic_status"] = df.apply(
lambda x: income_to_economic_status(x.income, df[df.hh_id == x.hh_id])
if x.economic_status == EconomicStatus.UNKNOWN else x.economic_status, axis=1
)

# Augment data using p_weight
if augment > 1:
df = augment_persons(df, augment)
Expand All @@ -31,9 +40,6 @@ def prepare_persons(hh, pp, tt, augment=5, max_hh_size=5, core_weekday=False, re
df.loc[df.age < 6, "pt_abo_avail"] = Availability.NO
fill(df, "pt_abo_avail", Availability.UNKNOWN)

# Replace unknown income group
fill(df, "economic_status", EconomicStatus.UNKNOWN)

# Large households are underrepresented and capped
df.n_persons = np.minimum(df.n_persons, max_hh_size)

Expand Down Expand Up @@ -64,19 +70,22 @@ def prepare_persons(hh, pp, tt, augment=5, max_hh_size=5, core_weekday=False, re

return df


def bins_to_labels(bins):
""" Convert bins to labels """
res = ["%.0f - %.0f" % (bins[i], bins[i + 1]) for i in range(len(bins) - 1)]
res = ["%.0f - %.0f" % (bins[i], bins[i + 1]) for i in range(len(bins) - 1)]

if bins[-1] == np.inf:
res[-1] = "%.0f+" % bins[-2]

return res


def cut(x, bins):
""" Cut x into bind and return labels """
return pd.cut(x, bins, labels=bins_to_labels(bins), right=False)


def augment_persons(pp, factor=1, permute_age=0.5):
""" Augment persons using p weight
Expand All @@ -101,6 +110,88 @@ def augment_persons(pp, factor=1, permute_age=0.5):
return duplicated[check_age_employment(None, duplicated)]


def income_to_economic_status(income, persons):
""" Convert income to economic status
:param income: income in Euro
:param persons: persons table
"""

if income < 0:
return EconomicStatus.UNKNOWN

# Calculated according to Srv 2018
# https://tu-dresden.de/bu/verkehr/ivs/srv/ressourcen/dateien/SrV2018_Tabellenbericht_Oberzentren_500TEW-_flach.pdf?lang=de

children = (persons.age < 14).sum()
rest = len(persons) - children - 1

w = 0.3 * children + 1 + 0.5 * rest

if income < 1500:
if w < 1.3:
return EconomicStatus.LOW

return EconomicStatus.VERY_LOW

elif income < 2000:
if w < 1.3:
return EconomicStatus.MEDIUM
elif w < 1.6:
return EconomicStatus.LOW

return EconomicStatus.VERY_LOW

elif income < 2600:
if w < 1.6:
return EconomicStatus.MEDIUM
elif w < 2.3:
return EconomicStatus.LOW

return EconomicStatus.VERY_LOW

elif income < 3000:
if w < 1.3:
return EconomicStatus.HIGH
elif w < 2.3:
return EconomicStatus.MEDIUM
elif w < 3.0:
return EconomicStatus.LOW

return EconomicStatus.VERY_LOW

elif income < 3600:
if w < 1.6:
return EconomicStatus.HIGH
elif w < 2.3:
return EconomicStatus.MEDIUM
elif w < 3.5:
return EconomicStatus.LOW

return EconomicStatus.VERY_LOW

elif income < 4600:
if w < 2.1:
return EconomicStatus.HIGH
elif w < 3.0:
return EconomicStatus.MEDIUM

return EconomicStatus.LOW

elif income < 5600:
if w < 1.3:
return EconomicStatus.VERY_HIGH
if w < 2.8:
return EconomicStatus.HIGH
return EconomicStatus.MEDIUM

else:
if w < 2.5:
return EconomicStatus.VERY_HIGH

return EconomicStatus.HIGH


def prepare_trips(pp, trips, core_weekday=True):
""" Create trip data frame """

Expand Down Expand Up @@ -286,7 +377,8 @@ def calc_commute(pp, tt):
edu.groupby("p_id").agg(commute_dist=("gis_length", "mean"), weight=("t_weight", "max"))


def calc_needed_short_distance_trips(ref_trips: pd.DataFrame, sim_trips: pd.DataFrame, max_dist=1000) -> Tuple[float, int]:
def calc_needed_short_distance_trips(ref_trips: pd.DataFrame, sim_trips: pd.DataFrame, max_dist=1000) -> Tuple[
float, int]:
""" Calculate number of short distance trips needed to add to match required share """

target_share = float(ref_trips[ref_trips.gis_length < (max_dist / 1000)].t_weight.sum() / ref_trips.t_weight.sum())
Expand All @@ -296,4 +388,4 @@ def calc_needed_short_distance_trips(ref_trips: pd.DataFrame, sim_trips: pd.Data
current_share = len(short_trips) / len(sim_trips)
num_trips = (len(short_trips) - len(sim_trips) * target_share) / (target_share - 1)

return target_share, num_trips
return target_share, num_trips

0 comments on commit 5da9b72

Please sign in to comment.