Skip to content
12 changes: 8 additions & 4 deletions create_trace_mapping.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""Create trace mapping."""

from pathlib import Path

import yaml
from nemosis import static_table

Expand All @@ -20,14 +24,14 @@
solar_generator_mapping = draft_solar_generator_to_trace_mapping(
solar_gens, solar_traces
)
with open("draft_solar_generator_mapping.yaml", "w") as file:
with Path.open("draft_solar_generator_mapping.yaml", "w") as file:
yaml.dump(solar_generator_mapping, file, default_flow_style=False)


solar_traces = "/media/nick/Samsung_T5/isp_2024_data/trace_data/solar/solar_2023"
rezs = gets_rezs(workbook)
solar_rez_mapping = draft_solar_rez_mapping(rezs, solar_traces)
with open("solar_area_mapping.yaml", "w") as file:
with Path.open("solar_area_mapping.yaml", "w") as file:
yaml.dump(solar_rez_mapping, file, default_flow_style=False)

duids_and_station_names = static_table(
Expand All @@ -48,12 +52,12 @@
wind_generator_mapping = draft_wind_generator_to_trace_mapping(
wind_gens, wind_duids_and_station_names, wind_traces
)
with open("draft_wind_generator_mapping.yaml", "w") as file:
with Path.open("draft_wind_generator_mapping.yaml", "w") as file:
yaml.dump(wind_generator_mapping, file, default_flow_style=False, sort_keys=False)


wind_traces = "D:/isp_2024_data/trace_data/wind/wind_2023"
rezs = gets_rezs(workbook)
wind_rez_mapping = draft_wind_rez_mapping(rezs, wind_traces)
with open("draft_wind_rez_mapping.yaml", "w") as file:
with Path.open("draft_wind_rez_mapping.yaml", "w") as file:
yaml.dump(wind_rez_mapping, file, default_flow_style=False)
35 changes: 13 additions & 22 deletions generator_to_trace_draft_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,23 @@ def get_all_generators(workbook_filepath):
additional_gens["Status"] = "additional"

existing_gens = existing_gens.rename(
columns={existing_gens.columns.values[0]: "Generator"}
columns={existing_gens.columns.to_numpy[0]: "Generator"}

@dylanjmcconnell dylanjmcconnell Apr 20, 2026

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.. fyi, .to_numpy is a method.. so should be .to_numpy()[0] (this is a known unsafe auto-fix)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoops!

)
committed_gens = committed_gens.rename(
columns={committed_gens.columns.values[0]: "Generator"}
columns={committed_gens.columns.to_numpy[0]: "Generator"}
)
anticipated_gens = anticipated_gens.rename(
columns={anticipated_gens.columns.values[0]: "Generator"}
columns={anticipated_gens.columns.to_numpy[0]: "Generator"}
)
additional_gens = additional_gens.rename(
columns={additional_gens.columns.values[0]: "Generator"}
columns={additional_gens.columns.to_numpy[0]: "Generator"}
)

all_gens = pd.concat(
[existing_gens, committed_gens, anticipated_gens, additional_gens]
)

all_gens = all_gens.loc[:, ["Generator", "Technology type"]]

return all_gens
return all_gens.loc[:, ["Generator", "Technology type"]]


def gets_rezs(workbook_filepath):
Expand All @@ -53,15 +51,12 @@ def gets_rezs(workbook_filepath):
)
workbook = Parser(workbook_filepath)
rezs = workbook.get_table_from_config(table_config)
rezs = rezs.loc[:, ["Name"]]
return rezs
return rezs.loc[:, ["Name"]]


def find_best_match(plant_name, csv_files):
best_match = process.extractOne(plant_name, csv_files, scorer=fuzz.token_set_ratio)
best_match = best_match[0] if best_match else None
best_match = best_match
return best_match
return best_match[0] if best_match else None


def find_best_match_two_columns(row, csv_files):
Expand Down Expand Up @@ -91,17 +86,15 @@ def draft_solar_generator_to_trace_mapping(solar_generators, solar_trace_directo
solar_generators["CSVFile"] = solar_generators["Generator"].apply(
lambda x: find_best_match(x, csv_project_names)
)
solar_generators = solar_generators.set_index("Generator")["CSVFile"].to_dict()
return solar_generators
return solar_generators.set_index("Generator")["CSVFile"].to_dict()


def draft_solar_rez_mapping(rezs, rezs_trace_directory):
csv_file_names = [f for f in os.listdir(rezs_trace_directory) if f.endswith(".csv")]
csv_file_metadata = [extract_solar_trace_metadata(f) for f in csv_file_names]
csv_rez_names = [f["name"] for f in csv_file_metadata if f["file_type"] == "area"]
rezs["CSVFile"] = rezs["Name"].apply(lambda x: find_best_match(x, csv_rez_names))
rezs = rezs.set_index("Name")["CSVFile"].to_dict()
return rezs
return rezs.set_index("Name")["CSVFile"].to_dict()


def draft_wind_generator_to_trace_mapping(
Expand All @@ -118,8 +111,8 @@ def draft_wind_generator_to_trace_mapping(
wind_generators["Station Name"] = wind_generators["Generator"].apply(
lambda x: find_best_match(x, wind_station_names)
)
wind_generators = pd.merge(
wind_generators, wind_duids_and_station_names, how="left", on="Station Name"
wind_generators = wind_generators.merge(
wind_duids_and_station_names, how="left", on="Station Name"
)
wind_generators = wind_generators.drop_duplicates(["Generator"])

Expand All @@ -131,14 +124,12 @@ def draft_wind_generator_to_trace_mapping(
:, ["Generator", "Station Name", "DUID", "CSVFile"]
]

wind_generators = wind_generators.set_index("Generator").to_dict(orient="index")
return wind_generators
return wind_generators.set_index("Generator").to_dict(orient="index")


def draft_wind_rez_mapping(rezs, rezs_trace_directory):
csv_file_names = [f for f in os.listdir(rezs_trace_directory) if f.endswith(".csv")]
csv_file_metadata = [extract_wind_trace_metadata(f) for f in csv_file_names]
csv_rez_names = [f["name"] for f in csv_file_metadata if f["file_type"] == "area"]
rezs["CSVFile"] = rezs["Name"].apply(lambda x: find_best_match(x, csv_rez_names))
rezs = rezs.set_index("Name")["CSVFile"].to_dict()
return rezs
return rezs.set_index("Name")["CSVFile"].to_dict()
2 changes: 2 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Nox configuration file."""

import nox

nox.options.default_venv_backend = "uv"
Expand Down
2 changes: 1 addition & 1 deletion src/isp_trace_parser/construct_reference_year_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ def construct_reference_year_mapping(
reference_years = (
reference_years * full_reference_year_cycles
) + reference_years[:partial_cycle_length]
return dict(zip(years, reference_years))
return dict(zip(years, reference_years, strict=True))
21 changes: 11 additions & 10 deletions src/isp_trace_parser/demand_traces.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import functools
import os
from pathlib import Path
from typing import Literal, Optional
from typing import Literal

import polars as pl
from joblib import Parallel, delayed
Expand Down Expand Up @@ -46,15 +46,16 @@ class DemandMetadataFilter(BaseModel):
reference_year: list of ints specifying reference_years
"""

subregion: Optional[list[str]] = None
scenario: Optional[
subregion: list[str] | None = None
scenario: (
list[Literal["Step Change", "Progressive Change", "Green Energy Exports"]]
] = None
poe: Optional[list[Literal["POE50", "POE10"]]] = None
demand_type: Optional[
list[Literal["OPSO_MODELLING", "OPSO_MODELLING_PVLITE", "PV_TOT"]]
] = None
reference_year: Optional[list[int]] = None
| None
) = None
poe: list[Literal["POE50", "POE10"]] | None = None
demand_type: (
list[Literal["OPSO_MODELLING", "OPSO_MODELLING_PVLITE", "PV_TOT"]] | None
) = None
reference_year: list[int] | None = None


@validate_call
Expand Down Expand Up @@ -270,4 +271,4 @@ def extract_metadata_for_all_demand_files(
A dictionary with filepaths as keys and metadata dicts as values.
"""
file_metadata = [extract_demand_trace_metadata(str(f.name)) for f in filenames]
return dict(zip(filenames, file_metadata))
return dict(zip(filenames, file_metadata, strict=True))
56 changes: 28 additions & 28 deletions src/isp_trace_parser/get_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
from pathlib import Path
from typing import List, Literal
from typing import Literal

import pandas as pd
import polars as pl
Expand Down Expand Up @@ -37,19 +37,20 @@ def _year_range_to_dt_range(
end_year, 7, 1
)

elif year_type == "calendar":
if year_type == "calendar":
return datetime.datetime(start_year, 1, 1), datetime.datetime(
end_year + 1, 1, 1
)
raise ValueError(year_type)


def _query_parquet_single_reference_year(
start_year: int,
end_year: int,
reference_year: int,
directory: str | Path,
filters: dict[str, any] = None,
select_columns: list[str] = None,
filters: dict[str, any] | None = None,
select_columns: list[str] | None = None,
year_type: Literal["fy", "calendar"] = "fy",
) -> pd.DataFrame:
"""
Expand Down Expand Up @@ -103,14 +104,14 @@ def _query_parquet_single_reference_year(
# Otherwise select all columns
columns_to_select = df_lazy.columns

df = (
dframe = (
df_lazy.filter(filter_expr)
.select(*columns_to_select)
.sort("datetime")
.collect()
)

return df.to_pandas()
return dframe.to_pandas()


def _query_parquet_multiple_reference_years(
Expand All @@ -136,19 +137,18 @@ def _query_parquet_multiple_reference_years(
start_year=year, end_year=year, reference_year=reference_year, **kwargs
)
)
data = pd.concat(data).reset_index(drop=True)
return data
return pd.concat(data).reset_index(drop=True)


@validate_call
def get_project_single_reference_year(
start_year: int,
end_year: int,
reference_year: int,
project: str | List,
project: str | list,
directory: str | Path,
year_type: Literal["fy", "calendar"] = "fy",
select_columns: list[str] = None,
select_columns: list[str] | None = None,
):
"""
Query project trace data for a single reference year.
Expand Down Expand Up @@ -237,11 +237,11 @@ def get_zone_single_reference_year(
start_year: int,
end_year: int,
reference_year: int,
zone: str | List,
resource_type: str | List,
zone: str | list,
resource_type: str | list,
directory: str | Path,
year_type: Literal["fy", "calendar"] = "fy",
select_columns: list[str] = None,
select_columns: list[str] | None = None,
):
"""
Query zone trace data for a single reference year.
Expand Down Expand Up @@ -333,13 +333,13 @@ def get_demand_single_reference_year(
start_year: int,
end_year: int,
reference_year: int,
scenario: str | List,
subregion: str | List,
demand_type: str | List,
poe: str | List,
scenario: str | list,
subregion: str | list,
demand_type: str | list,
poe: str | list,
directory: str | Path,
year_type: Literal["fy", "calendar"] = "fy",
select_columns: list[str] = None,
select_columns: list[str] | None = None,
):
"""
Query demand trace data for a single reference year.
Expand Down Expand Up @@ -441,10 +441,10 @@ def get_demand_single_reference_year(
@validate_call
def get_project_multiple_reference_years(
reference_year_mapping: dict[int, int],
project: str | List,
project: str | list,
directory: str | Path,
year_type: Literal["fy", "calendar"] = "fy",
select_columns: list[str] = None,
select_columns: list[str] | None = None,
):
"""
Query project trace data across multiple reference years.
Expand Down Expand Up @@ -530,11 +530,11 @@ def get_project_multiple_reference_years(
@validate_call
def get_zone_multiple_reference_years(
reference_year_mapping: dict[int, int],
zone: str | List,
resource_type: str | List,
zone: str | list,
resource_type: str | list,
directory: str | Path,
year_type: Literal["fy", "calendar"] = "fy",
select_columns: list[str] = None,
select_columns: list[str] | None = None,
):
"""
Query zone trace data across multiple reference years.
Expand Down Expand Up @@ -623,13 +623,13 @@ def get_zone_multiple_reference_years(
@validate_call
def get_demand_multiple_reference_years(
reference_year_mapping: dict[int, int],
scenario: str | List,
subregion: str | List,
demand_type: str | List,
poe: str | List,
scenario: str | list,
subregion: str | list,
demand_type: str | list,
poe: str | list,
directory: str | Path,
year_type: Literal["fy", "calendar"] = "fy",
select_columns: list[str] = None,
select_columns: list[str] | None = None,
):
"""
Query demand trace data across multiple reference years.
Expand Down
11 changes: 7 additions & 4 deletions src/isp_trace_parser/input_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
def input_directory(path: Path | str) -> Path:
path = is_valid_path(path)
if not path.is_dir():
raise ValueError(f"Directory {path} does not exist")
msg = f"Directory {path} does not exist"
raise ValueError(msg)
return path


Expand All @@ -16,9 +17,11 @@ def is_valid_path(path: str | Path) -> Path:
try:
return Path(path)
except (TypeError, ValueError):
raise ValueError(f"Invalid parsed directory path: {path}")
msg = f"Invalid parsed directory path: {path}"
raise ValueError(msg) from None


def start_year_before_end_year(start_year, end_year):
def start_year_before_end_year(start_year, end_year) -> None:
if end_year < start_year:
raise ValueError(f"Start year {end_year} < end year {start_year}")
msg = f"Start year {end_year} < end year {start_year}"
raise ValueError(msg)
13 changes: 8 additions & 5 deletions src/isp_trace_parser/metadata_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def extract_solar_trace_metadata(filename):
match_data["reference_year"] = int(match_data["reference_year"])
return match_data

raise ValueError(f"Filename '{filename}' does not match the expected pattern")
msg = f"Filename '{filename}' does not match the expected pattern"
raise ValueError(msg)


def extract_wind_trace_metadata(filename):
Expand Down Expand Up @@ -57,7 +58,8 @@ def extract_wind_trace_metadata(filename):
match_data["reference_year"] = int(match_data["reference_year"])
return match_data

raise ValueError(f"Filename '{filename}' does not match the expected pattern")
msg = f"Filename '{filename}' does not match the expected pattern"
raise ValueError(msg)


def extract_demand_trace_metadata(filename):
Expand All @@ -75,6 +77,7 @@ def extract_demand_trace_metadata(filename):
match_data = match.groupdict()
match_data["reference_year"] = int(match_data["reference_year"])
return match_data
else:
# If the pattern does not match, raise an error or return None
raise ValueError(f"Filename '{filename}' does not match the expected pattern")

# If the pattern does not match, raise an error or return None
msg = f"Filename '{filename}' does not match the expected pattern"
raise ValueError(msg)
Loading
Loading