From 166d64d51e492562a4c893cb3f8fa7d643e9401f Mon Sep 17 00:00:00 2001 From: Bartosz Wieczorek Date: Tue, 2 Sep 2025 18:14:05 +0200 Subject: [PATCH] init --- .gitignore | 185 ++++++++++++++++++ .../inspectionProfiles/profiles_settings.xml | 6 + .idea/modules.xml | 8 + .idea/ranczo-energy-usage-scraper.iml | 8 + DistributionCost.py | 23 +++ DistributionCostFactory.py | 34 ++++ DistributionCostProvider/TauronG11Provider.py | 14 ++ DistributionCostProvider/TauronG12Provider.py | 33 ++++ .../TauronG12WProvider.py | 16 ++ DistributionCostProvider/TauronG13Provider.py | 68 +++++++ .../TauronG13SProvider.py | 91 +++++++++ DistributionCostProvider/__init__.py | 1 + EnergyPrice.py | 29 +++ EnergyPriceFactory.py | 32 +++ EnergyPriceProvider/DynamicPricesProvider.py | 113 +++++++++++ EnergyPriceProvider/RDNProvider.py | 38 ++++ EnergyPriceProvider/TauronG11Provider.py | 13 ++ EnergyPriceProvider/TauronG12Provider.py | 33 ++++ EnergyPriceProvider/TauronG12WProvider.py | 14 ++ EnergyPriceProvider/TauronG13Provider.py | 72 +++++++ EnergyPriceProvider/__init__.py | 1 + EnergyPriceScraper.py | 163 +++++++++++++++ EnergyPriceScraperFactory.py | 48 +++++ README.md | 19 ++ Scraper/InstratRDN_CSVScraper.py | 78 ++++++++ Scraper/PSE_RCEScraper.py | 99 ++++++++++ Scraper/PstrykScraper.py | 121 ++++++++++++ Scraper/__init__.py | 1 + app.py | 83 ++++++++ app_impl.py | 88 +++++++++ install.sh | 35 ++++ logging_setup.py | 52 +++++ logging_utils.py | 59 ++++++ os/energy-price-scrapers-update.sh | 16 ++ os/energy-price-scrapers.service | 23 +++ plot_cost_breakdown.py | 107 ++++++++++ requirements.txt | 6 + utils/calendar_pl.py | 44 +++++ utils/time_helpers.py | 15 ++ 39 files changed, 1889 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/ranczo-energy-usage-scraper.iml create mode 100644 DistributionCost.py create mode 100644 DistributionCostFactory.py create mode 100644 DistributionCostProvider/TauronG11Provider.py create mode 100644 DistributionCostProvider/TauronG12Provider.py create mode 100644 DistributionCostProvider/TauronG12WProvider.py create mode 100644 DistributionCostProvider/TauronG13Provider.py create mode 100644 DistributionCostProvider/TauronG13SProvider.py create mode 100644 DistributionCostProvider/__init__.py create mode 100644 EnergyPrice.py create mode 100644 EnergyPriceFactory.py create mode 100644 EnergyPriceProvider/DynamicPricesProvider.py create mode 100644 EnergyPriceProvider/RDNProvider.py create mode 100644 EnergyPriceProvider/TauronG11Provider.py create mode 100644 EnergyPriceProvider/TauronG12Provider.py create mode 100644 EnergyPriceProvider/TauronG12WProvider.py create mode 100644 EnergyPriceProvider/TauronG13Provider.py create mode 100644 EnergyPriceProvider/__init__.py create mode 100644 EnergyPriceScraper.py create mode 100644 EnergyPriceScraperFactory.py create mode 100644 README.md create mode 100644 Scraper/InstratRDN_CSVScraper.py create mode 100644 Scraper/PSE_RCEScraper.py create mode 100644 Scraper/PstrykScraper.py create mode 100644 Scraper/__init__.py create mode 100644 app.py create mode 100644 app_impl.py create mode 100644 install.sh create mode 100644 logging_setup.py create mode 100644 logging_utils.py create mode 100644 os/energy-price-scrapers-update.sh create mode 100644 os/energy-price-scrapers.service create mode 100644 plot_cost_breakdown.py create mode 100644 requirements.txt create mode 100644 utils/calendar_pl.py create mode 100644 utils/time_helpers.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d7e9d13 --- /dev/null +++ b/.gitignore @@ -0,0 +1,185 @@ +# Created by .ignore support plugin (hsz.mobi) +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject +### VirtualEnv template +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +.venv +pip-selfcheck.json + +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +# idea folder, uncomment if you don't need it +# .idea \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..2ea3280 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/ranczo-energy-usage-scraper.iml b/.idea/ranczo-energy-usage-scraper.iml new file mode 100644 index 0000000..d9e6024 --- /dev/null +++ b/.idea/ranczo-energy-usage-scraper.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/DistributionCost.py b/DistributionCost.py new file mode 100644 index 0000000..de457d1 --- /dev/null +++ b/DistributionCost.py @@ -0,0 +1,23 @@ +from __future__ import annotations +from dataclasses import dataclass +from datetime import datetime +import zoneinfo + +from utils.time_helpers import WARSAW_TZ + +@dataclass +class DistributionCostBase: + tz: zoneinfo.ZoneInfo = WARSAW_TZ + + def to_local_dt(self, ts: datetime) -> datetime: + if ts.tzinfo is None: + return ts.replace(tzinfo=self.tz) + return ts.astimezone(self.tz) + + def rate(self, ts: datetime) -> float: + """Return PLN/kWh (netto) for given timestamp (override in subclasses).""" + raise NotImplementedError + + def cost(self, ts: datetime, consumption_kwh: float) -> float: + return self.rate(ts) * float(consumption_kwh) + diff --git a/DistributionCostFactory.py b/DistributionCostFactory.py new file mode 100644 index 0000000..4c17dfe --- /dev/null +++ b/DistributionCostFactory.py @@ -0,0 +1,34 @@ +from __future__ import annotations +import importlib +from typing import Any, Type, cast + +from DistributionCost import DistributionCostBase + +def create(name: str, /, **kwargs: Any) -> DistributionCostBase: + """ + Instantiate provider by name using a simple convention: + module: DistributionProvider.Provider + class: Provider + Example: create("TauronG13", rates=...) + -> DistributionCostProvider.TauronG13Provider.TauronG13Provider(...) + """ + safe = "".join(ch for ch in name if ch.isalnum() or ch == "_") + module_name = f"DistributionCostProvider.{safe}Provider" + class_name = f"{safe}Provider" + + try: + mod = importlib.import_module(module_name) + except ModuleNotFoundError as e: + raise ValueError(f"Provider module '{module_name}' not found for name '{name}'.") from e + + try: + cls: Type = getattr(mod, class_name) + except AttributeError as e: + raise ValueError(f"Provider class '{class_name}' not found in '{module_name}'.") from e + + # be sure that this is a subclass of DistributionCostBase + if not issubclass(cls, DistributionCostBase): + raise TypeError(f"{class_name} does not derive from DistributionCostBase") + + ProviderCls = cast(type[DistributionCostBase], cls) + return ProviderCls(**kwargs) # type: ignore[call-arg] diff --git a/DistributionCostProvider/TauronG11Provider.py b/DistributionCostProvider/TauronG11Provider.py new file mode 100644 index 0000000..9ada450 --- /dev/null +++ b/DistributionCostProvider/TauronG11Provider.py @@ -0,0 +1,14 @@ +from __future__ import annotations +from datetime import datetime +from DistributionCost import DistributionCostBase + +from logging_utils import HasLogger + +# ---------- TAURON G11 ---------- +class TauronG11Provider(DistributionCostBase, HasLogger): + def __init__(self): + self._init_logger() + + def rate(self, ts: datetime) -> float: + return 0.504 + diff --git a/DistributionCostProvider/TauronG12Provider.py b/DistributionCostProvider/TauronG12Provider.py new file mode 100644 index 0000000..f9a2344 --- /dev/null +++ b/DistributionCostProvider/TauronG12Provider.py @@ -0,0 +1,33 @@ +from __future__ import annotations +from datetime import time, datetime +from DistributionCost import DistributionCostBase +from utils.time_helpers import in_range_local, TimeRange + + +# ---------- TAURON G12 ---------- +class TauronG12Provider(DistributionCostBase): + """ + Dzień 0,3310 zł/kWh netto + Noc 0,0994 zł/kWh netto + """ + low_1: TimeRange = (time(22,0), time(6,0)) # over midnight + low_2: TimeRange = (time(13,0), time(15,0)) + high_1: TimeRange = (time(6,0), time(13,0)) + high_2: TimeRange = (time(15,0), time(22,0)) + + def __init__(self): + self.rates={ + "high" : 0.3310, # + "low" : 0.0994 + } + + def rate(self, ts: datetime) -> float: + dt = self.to_local_dt(ts) + t = dt.time() + + if in_range_local(t, self.low_1) or in_range_local(t, self.low_2): + return self.rates["low"] + if in_range_local(t, self.high_1) or in_range_local(t, self.high_2): + return self.rates["high"] + return self.rates["high"] + diff --git a/DistributionCostProvider/TauronG12WProvider.py b/DistributionCostProvider/TauronG12WProvider.py new file mode 100644 index 0000000..d446f1f --- /dev/null +++ b/DistributionCostProvider/TauronG12WProvider.py @@ -0,0 +1,16 @@ +from __future__ import annotations +from DistributionCostProvider.TauronG12Provider import TauronG12Provider +from utils.calendar_pl import is_weekend_or_holiday + + +# ---------- TAURON G12w ---------- +class TauronG12WProvider(TauronG12Provider): + """ + Like G12 on weekdays; whole weekends & holidays are 'low'. + rates={'low':..., 'high':...} + """ + def rate(self, ts): + dt = self.to_local_dt(ts) + if is_weekend_or_holiday(dt.date()): + return self.rates["low"] + return super().rate(ts) diff --git a/DistributionCostProvider/TauronG13Provider.py b/DistributionCostProvider/TauronG13Provider.py new file mode 100644 index 0000000..f5fd359 --- /dev/null +++ b/DistributionCostProvider/TauronG13Provider.py @@ -0,0 +1,68 @@ +from __future__ import annotations +from datetime import time, date, datetime +from DistributionCost import DistributionCostBase +from utils.time_helpers import in_range_local, TimeRange +from utils.calendar_pl import is_weekend_or_holiday + + +# ---------- TAURON G13 ---------- +class TauronG13Provider(DistributionCostBase): + """ +Szczyt przedpołudniowy + 0,2826 zł/kWh brutto + 0,2298 zł/kWh netto +Szczyt popołudniowy + 0,4645 zł/kWh brutto + 0,3777 zł/kWh netto +Pozostałe godziny + 0,0911 zł/kWh brutto + 0,0741 zł/kWh netto + """ + winter_med: TimeRange = (time(7,0), time(13,0)) + winter_high: TimeRange = (time(16,0), time(21,0)) + winter_low_1: TimeRange = (time(13,0), time(16,0)) + winter_low_2: TimeRange = (time(21,0), time(7,0)) # over midnight + + summer_med: TimeRange = (time(7,0), time(13,0)) + summer_high: TimeRange = (time(19,0), time(22,0)) + summer_low_1: TimeRange = (time(13,0), time(19,0)) + summer_low_2: TimeRange = (time(22,0), time(7,0)) # over midnight + + def __init__(self): + self.rates={ + "high" : 0.3777, # ~0,465 brutto + "med" : 0.2298, + "low" : 0.0741 + } + + def _is_winter(self, d: date) -> bool: + return d.month in (10, 11, 12, 1, 2, 3) + + def rate(self, ts: datetime) -> float: + dt = self.to_local_dt(ts) + d, t = dt.date(), dt.time() + + # weekend/holiday → always 'low' + if is_weekend_or_holiday(d): + return self.rates["low"] + + if self._is_winter(d): + if in_range_local(t, self.winter_high): + key = "high" + elif in_range_local(t, self.winter_med): + key = "med" + elif in_range_local(t, self.winter_low_1) or in_range_local(t, self.winter_low_2): + key = "low" + else: + key = "low" + else: + if in_range_local(t, self.summer_high): + key = "high" + elif in_range_local(t, self.summer_med): + key = "med" + elif in_range_local(t, self.summer_low_1) or in_range_local(t, self.summer_low_2): + key = "low" + else: + key = "low" + + return self.rates[key] diff --git a/DistributionCostProvider/TauronG13SProvider.py b/DistributionCostProvider/TauronG13SProvider.py new file mode 100644 index 0000000..cdd03b9 --- /dev/null +++ b/DistributionCostProvider/TauronG13SProvider.py @@ -0,0 +1,91 @@ +from __future__ import annotations +from dataclasses import dataclass +from datetime import time, datetime +from typing import Dict, Literal + +from DistributionCost import DistributionCostBase +from utils.time_helpers import in_range_local +from utils.calendar_pl import is_weekend_or_holiday + +Season = Literal["summer", "winter"] +DayType = Literal["workday", "dayoff"] +Band = Literal["peak", "offpeak_day", "night"] + +RatesDict = Dict[Season, Dict[DayType, Dict[Band, float]]] +@dataclass +class TauronG13SProvider(DistributionCostBase): + """ + TAURON Dystrybucja – taryfa G13s (od 1 lipca 2025). + + Strefy czasowe (Europe/Warsaw): + * Noc: 21:00–07:00 (cały rok) + * Lato: 07:00–09:00 (szczyt), 09:00–17:00 (pozaszczyt), 17:00–21:00 (szczyt) + * Zima: 07:00–10:00 (szczyt), 10:00–15:00 (pozaszczyt), 15:00–21:00 (szczyt) + + Ceny różnią się także rodzajem dnia: + - workday = pon–pt z wyłączeniem świąt, + - dayoff = sobota, niedziela i dni ustawowo wolne od pracy. + + Oczekuje stawek NETTO (PLN/kWh) przekazanych w `rates` według struktury RatesDict. + """ + rates: RatesDict = None # wstrzykuj w konstruktorze + + # stałe zakresy godzin + NIGHT: tuple[time, time] = (time(21, 0), time(7, 0)) # przez północ + + SUMMER_PEAK_1: tuple[time, time] = (time(7, 0), time(9, 0)) + SUMMER_OFFDAY: tuple[time, time] = (time(9, 0), time(17, 0)) + SUMMER_PEAK_2: tuple[time, time] = (time(17, 0), time(21, 0)) + + WINTER_PEAK_1: tuple[time, time] = (time(7, 0), time(10, 0)) + WINTER_OFFDAY: tuple[time, time] = (time(10, 0), time(15, 0)) + WINTER_PEAK_2: tuple[time, time] = (time(15, 0), time(21, 0)) + + def __init__(self): + self.rates= { + "winter": + { + "dayoff": { "peak" : 0.20, "offpeak_day" : 0.12, "night" : 0.11 }, + "workday":{ "peak" : 0.24, "offpeak_day" : 0.2, "night" : 0.11 } + }, + "summer": + { + "dayoff": { "peak": 0.12, "offpeak_day": 0.04, "night": 0.11}, + "workday": { "peak": 0.29, "offpeak_day": 0.1, "night": 0.11 } + } + } + + @staticmethod + def _season(d: datetime.date) -> Season: + return "summer" if 4 <= d.month <= 9 else "winter" + + @staticmethod + def _daytype(d: datetime.date) -> DayType: + return "dayoff" if is_weekend_or_holiday(d) else "workday" + + def _band(self, t: time, season: Season) -> Band: + if in_range_local(t, self.NIGHT): + return "night" + + if season == "summer": + if in_range_local(t, self.SUMMER_PEAK_1) or in_range_local(t, self.SUMMER_PEAK_2): + return "peak" + if in_range_local(t, self.SUMMER_OFFDAY): + return "offpeak_day" + else: # winter + if in_range_local(t, self.WINTER_PEAK_1) or in_range_local(t, self.WINTER_PEAK_2): + return "peak" + if in_range_local(t, self.WINTER_OFFDAY): + return "offpeak_day" + + return "night" + + def rate(self, ts: datetime) -> float: + dt = self.to_local_dt(ts) + season = self._season(dt.date()) + daytype = self._daytype(dt.date()) + band = self._band(dt.time(), season) + try: + return self.rates[season][daytype][band] + except KeyError as e: + raise KeyError(f"no price for [{season}][{daytype}][{band}]") from e diff --git a/DistributionCostProvider/__init__.py b/DistributionCostProvider/__init__.py new file mode 100644 index 0000000..66adc29 --- /dev/null +++ b/DistributionCostProvider/__init__.py @@ -0,0 +1 @@ +# empty (kept for package import); discovery is done by the factory via importlib diff --git a/EnergyPrice.py b/EnergyPrice.py new file mode 100644 index 0000000..0a84030 --- /dev/null +++ b/EnergyPrice.py @@ -0,0 +1,29 @@ +# EnergyPrice.py +from __future__ import annotations +from dataclasses import dataclass +from datetime import datetime +import zoneinfo +from utils.time_helpers import WARSAW_TZ + +@dataclass +class EnergyPriceBase: + tz: zoneinfo.ZoneInfo = WARSAW_TZ + + def to_local_dt(self, ts: datetime) -> datetime: + if ts.tzinfo is None: + return ts.replace(tzinfo=self.tz) + return ts.astimezone(self.tz) + + def has_rate(self, ts: datetime) -> bool: + try: + self.rate(ts) + return True + except KeyError: + return False + + def rate(self, ts: datetime) -> float: + """Return PLN/kWh (net) for given timestamp (override in subclasses).""" + raise NotImplementedError + + def cost(self, ts: datetime, consumption_kwh: float) -> float: + return self.rate(ts) * float(consumption_kwh) diff --git a/EnergyPriceFactory.py b/EnergyPriceFactory.py new file mode 100644 index 0000000..f1e75f9 --- /dev/null +++ b/EnergyPriceFactory.py @@ -0,0 +1,32 @@ +# EnergyPriceFactory.py +from __future__ import annotations +import importlib +from typing import Any, cast, Type +from EnergyPrice import EnergyPriceBase + +def create(name: str, /, **kwargs: Any) -> EnergyPriceBase: + """ + Convention: + module: EnergyPriceProvider.Provider + class: Provider + Example: create("TauronG13", rates={...}) + """ + safe = "".join(ch for ch in name if ch.isalnum() or ch == "_") + module_name = f"EnergyPriceProvider.{safe}Provider" + class_name = f"{safe}Provider" + + try: + mod = importlib.import_module(module_name) + except ModuleNotFoundError as e: + raise ValueError(f"Provider module not found: {module_name}") from e + + try: + cls = getattr(mod, class_name) + except AttributeError as e: + raise ValueError(f"Provider class not found: {class_name} in {module_name}") from e + + if not issubclass(cls, EnergyPriceBase): + raise TypeError(f"{class_name} must inherit EnergyCostBase") + + ProviderCls = cast(Type[EnergyPriceBase], cls) + return ProviderCls(**kwargs) # type: ignore[arg-type] diff --git a/EnergyPriceProvider/DynamicPricesProvider.py b/EnergyPriceProvider/DynamicPricesProvider.py new file mode 100644 index 0000000..a1a5b02 --- /dev/null +++ b/EnergyPriceProvider/DynamicPricesProvider.py @@ -0,0 +1,113 @@ +from __future__ import annotations +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Dict, List, Tuple, Optional +from zoneinfo import ZoneInfo +import psycopg +from EnergyPrice import EnergyPriceBase + +WAW = ZoneInfo("Europe/Warsaw") +Interval = Tuple[datetime, datetime, float] # (ts_start, ts_end, price_pln_net) + +@dataclass +class DynamicPricesProvider(EnergyPriceBase): + """ + Bazowa klasa dla cen dynamicznych trzymanych w pricing.energy_prices (Timescale). + - rate(ts): zwraca cenę (PLN/kWh netto) dla podanej chwili. + Przy pierwszym wywołaniu dla danej doby – ładuje CAŁĄ dobę do cache. + - has_rate(ts): domyślnie True (możesz nadpisać w podklasie, jeśli chcesz sprawdzać realną obecność danych). + - Subklasy definiują PROVIDER i KIND (np. 'TGE' / 'fixing_I'). + """ + dsn: Optional[str] = None # np. "postgresql://user:pass@localhost:5432/postgres" + conn: Optional[psycopg.Connection] = None # albo podaj gotowe połączenie psycopg3 + table: str = "pricing.energy_prices" + tz: ZoneInfo = WAW + max_cached_days: int = 14 # ile różnych dób trzymać w cache + + # identyfikatory – nadpisujesz w podklasie + SIDE: str = "" # 'buy' albo 'sell' + PROVIDER: str = "" + KIND: str = "" + + # prosty cache: klucz = początek doby (local), wartość = lista interwałów (start, end, price) + _cache: Dict[datetime, List[Interval]] = field(default_factory=dict, init=False, repr=False) + _cache_order: List[datetime] = field(default_factory=list, init=False, repr=False) + + # ---------- public API ---------- + def provider(self) -> str: + if not self.PROVIDER: + raise NotImplementedError("Subclass must define PROVIDER") + return self.PROVIDER + + def kind(self) -> str: + if not self.KIND: + raise NotImplementedError("Subclass must define KIND") + return self.KIND + + def side(self) -> str: + if not self.SIDE: + raise NotImplementedError("Subclass must define SIDE") + return self.SIDE + + def rate(self, ts: datetime) -> float: + """Zwraca cenę netto PLN/kWh dla chwili ts. Ładuje cały dzień do cache przy pierwszym wywołaniu.""" + dt = self._to_local(ts) + day_key = self._day_key(dt) + self._ensure_day_cached(day_key) + for start, end, price in self._cache.get(day_key, []): + if start <= dt < end: + return price + raise KeyError(f"No price for {dt.isoformat()} (provider={self.provider()}, kind={self.kind()})") + + def preload_day(self, day: datetime | None = None): + """Opcjonalnie: prefetch doby (początek dnia lokalnie).""" + day_key = self._day_key(self._to_local(day or datetime.now(tz=self.tz))) + self._ensure_day_cached(day_key) + + def clear_cache(self): + self._cache.clear() + self._cache_order.clear() + + # ---------- internals ---------- + def _to_local(self, ts: datetime) -> datetime: + return ts.replace(tzinfo=self.tz) if ts.tzinfo is None else ts.astimezone(self.tz) + + def _day_key(self, ts_local: datetime) -> datetime: + return ts_local.replace(hour=0, minute=0, second=0, microsecond=0) + + def _ensure_conn(self) -> psycopg.Connection: + if self.conn is not None: + return self.conn + if not self.dsn: + raise RuntimeError("Provide dsn= or conn= to DynamicPricesProvider") + self.conn = psycopg.connect(self.dsn) + return self.conn + + def _ensure_day_cached(self, day_start_local: datetime): + if day_start_local in self._cache: + return + self._cache[day_start_local] = self._fetch_day(day_start_local) + self._cache_order.append(day_start_local) + # prosty limit cache po liczbie dni + while len(self._cache_order) > self.max_cached_days: + oldest = self._cache_order.pop(0) + self._cache.pop(oldest, None) + + def _fetch_day(self, day_start_local: datetime) -> List[Interval]: + """Pobiera z DB wszystkie rekordy nachodzące na [day_start, day_start+1d).""" + day_end_local = day_start_local + timedelta(days=1) + sql = f""" + SELECT ts_start, ts_end, price_pln_net + FROM {self.table} + WHERE provider = %s + AND kind = %s + AND side = %s + AND tstzrange(ts_start, ts_end, '[)') && tstzrange(%s::timestamptz, %s::timestamptz, '[)') + ORDER BY ts_start + """ + + with self._ensure_conn().cursor() as cur: + cur.execute(sql, (self.provider(), self.kind(), self.side(), day_start_local, day_end_local)) + rows = cur.fetchall() + # rows: List[Tuple[datetime, datetime, Decimal]] + return [(r[0], r[1], float(r[2])) for r in rows] \ No newline at end of file diff --git a/EnergyPriceProvider/RDNProvider.py b/EnergyPriceProvider/RDNProvider.py new file mode 100644 index 0000000..9577418 --- /dev/null +++ b/EnergyPriceProvider/RDNProvider.py @@ -0,0 +1,38 @@ +from __future__ import annotations +from dataclasses import dataclass +from datetime import datetime, timedelta +from typing import Callable, Mapping, Optional +from EnergyPrice import EnergyPriceBase + +PriceLookup = Callable[[datetime], Optional[float]] + +@dataclass +class RDNProvider(EnergyPriceBase): + """ + Skeleton for market-based energy cost (RDN). + Choose ONE feeding strategy: + 1) price_lookup: callable(ts_local) -> PLN/kWh (net) or None + 2) mapping: dict/Mapping[datetime->PLN/kWh] with tz-aware keys (Europe/Warsaw) + Optional: period controls how you round/align (default: 1h). + """ + price_lookup: PriceLookup | None = None + mapping: Mapping[datetime, float] | None = None + period: timedelta = timedelta(hours=1) + + def _get_price(self, ts_local: datetime) -> float: + if self.price_lookup: + val = self.price_lookup(ts_local) + if val is None: + raise KeyError(f"No RDN price for {ts_local.isoformat()}") + return float(val) + if self.mapping is not None: + # align to exact period start (e.g., hour start) + aligned = ts_local.replace(minute=0, second=0, microsecond=0) + if aligned in self.mapping: + return float(self.mapping[aligned]) + raise KeyError(f"No RDN price in mapping for {aligned.isoformat()}") + raise RuntimeError("RDNProvider needs price_lookup or mapping") + + def rate(self, ts: datetime) -> float: + dt = self.to_local_dt(ts) + return self._get_price(dt) diff --git a/EnergyPriceProvider/TauronG11Provider.py b/EnergyPriceProvider/TauronG11Provider.py new file mode 100644 index 0000000..e2cf024 --- /dev/null +++ b/EnergyPriceProvider/TauronG11Provider.py @@ -0,0 +1,13 @@ +from __future__ import annotations +from EnergyPrice import EnergyPriceBase +from utils.calendar_pl import gov_energy_prices_shield + +class TauronG11Provider(EnergyPriceBase): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def rate(self, ts): + if gov_energy_prices_shield(ts): + return 0.62/1.23 + else: + raise RuntimeError("brak danych dla nie tarczy, trzeba ogarnąć") diff --git a/EnergyPriceProvider/TauronG12Provider.py b/EnergyPriceProvider/TauronG12Provider.py new file mode 100644 index 0000000..781cd1e --- /dev/null +++ b/EnergyPriceProvider/TauronG12Provider.py @@ -0,0 +1,33 @@ +from __future__ import annotations +from datetime import time, datetime +from EnergyPrice import EnergyPriceBase +from utils.time_helpers import in_range_local +from utils.calendar_pl import gov_energy_prices_shield + +class TauronG12Provider(EnergyPriceBase): + low1 = (time(22,0), time(6,0)) + low2 = (time(13,0), time(15,0)) + high1 = (time(6,0), time(13,0)) + high2 = (time(15,0), time(22,0)) + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + @staticmethod + def _rates(ts: datetime): + if gov_energy_prices_shield(ts): + tarcza_rates = 0.62 / 1.23 + return { + "low": 0.57/1.23, "high": tarcza_rates + } + else: + raise RuntimeError("brak danych dla nie tarczy, trzeba ogarnąć") + + def rate(self, ts): + rates = self._rates(ts) + t = self.to_local_dt(ts).time() + if in_range_local(t, self.low1) or in_range_local(t, self.low2): + return rates["low"] + if in_range_local(t, self.high1) or in_range_local(t, self.high2): + return rates["high"] + return rates["high"] diff --git a/EnergyPriceProvider/TauronG12WProvider.py b/EnergyPriceProvider/TauronG12WProvider.py new file mode 100644 index 0000000..0df5639 --- /dev/null +++ b/EnergyPriceProvider/TauronG12WProvider.py @@ -0,0 +1,14 @@ +from __future__ import annotations +from EnergyPriceProvider.TauronG12Provider import TauronG12Provider +from utils.calendar_pl import is_weekend_or_holiday + +class TauronG12WProvider(TauronG12Provider): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def rate(self, ts): + dt = self.to_local_dt(ts) + if is_weekend_or_holiday(dt.date()): + rates = self._rates(ts) + return rates["low"] + return super().rate(ts) diff --git a/EnergyPriceProvider/TauronG13Provider.py b/EnergyPriceProvider/TauronG13Provider.py new file mode 100644 index 0000000..f886bfc --- /dev/null +++ b/EnergyPriceProvider/TauronG13Provider.py @@ -0,0 +1,72 @@ +from __future__ import annotations +from datetime import time, datetime +from EnergyPrice import EnergyPriceBase +from utils.time_helpers import in_range_local +from utils.calendar_pl import is_weekend_or_holiday, gov_energy_prices_shield + +class TauronG13Provider(EnergyPriceBase): + """ + Energy cost – Tauron G13 (NET PLN/kWh). + Zima (X–III): + medium: 07–13 + high: 16–21 + low: 13–16, 21–07; weekend/święta = low + Lato (IV–IX): + medium: 07–13 + high: 19–22 + low: 13–19, 22–07; weekend/święta = low + """ + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self.w_med = (time(7,0), time(13,0)) + self.w_high = (time(16,0), time(21,0)) + self.w_low1 = (time(13,0), time(16,0)) + self.w_low2 = (time(21,0), time(7,0)) + + self.s_med = (time(7,0), time(13,0)) + self.s_high = (time(19,0), time(22,0)) + self.s_low1 = (time(13,0), time(19,0)) + self.s_low2 = (time(22,0), time(7,0)) + + @staticmethod + def _rates(ts: datetime): + if gov_energy_prices_shield(ts): + tarcza_rates = 0.62/1.23 + return { + "summer": { # IV–IX + "workday": {"low": tarcza_rates, "medium": tarcza_rates, "high": tarcza_rates}, + "dayoff": {"low": tarcza_rates, "medium": tarcza_rates, "high": tarcza_rates}, + }, + "winter": { # X–III + "workday": {"low": tarcza_rates, "medium": tarcza_rates, "high": tarcza_rates}, + "dayoff": {"low": tarcza_rates, "medium": tarcza_rates, "high": tarcza_rates}, + }, + } + else: + raise RuntimeError("brak danych dla nie tarczy, trzeba ogarnąć") + + @staticmethod + def _is_winter(month: int) -> bool: + return month in (10, 11, 12, 1, 2, 3) + + def rate(self, ts): + dt = self.to_local_dt(ts) + d, t = dt.date(), dt.time() + rates = self._rates(ts) + + if is_weekend_or_holiday(d): + return rates["low"] + + if self._is_winter(d.month): + if in_range_local(t, self.w_high): key = "high" + elif in_range_local(t, self.w_med): key = "medium" + elif in_range_local(t, self.w_low1) or in_range_local(t, self.w_low2): key = "low" + else: key = "low" + else: + if in_range_local(t, self.s_high): key = "high" + elif in_range_local(t, self.s_med): key = "medium" + elif in_range_local(t, self.s_low1) or in_range_local(t, self.s_low2): key = "low" + else: key = "low" + + return rates[key] diff --git a/EnergyPriceProvider/__init__.py b/EnergyPriceProvider/__init__.py new file mode 100644 index 0000000..4c29be4 --- /dev/null +++ b/EnergyPriceProvider/__init__.py @@ -0,0 +1 @@ +# (left intentionally empty; factory imports modules dynamically) diff --git a/EnergyPriceScraper.py b/EnergyPriceScraper.py new file mode 100644 index 0000000..984bda4 --- /dev/null +++ b/EnergyPriceScraper.py @@ -0,0 +1,163 @@ +from __future__ import annotations +from dataclasses import dataclass +from datetime import datetime, date, timedelta, timezone +from typing import Iterable, List, Tuple, Dict, Any, Optional +from zoneinfo import ZoneInfo +import json +import psycopg +from utils.time_helpers import WARSAW_TZ + +IntervalRow = Tuple[datetime, datetime, float, str, str, str, str, str, str] # patrz _rows_to_upsert + +UPSERT_SQL = """ +INSERT INTO pricing.energy_prices + (ts_start, ts_end, price_pln_net, provider, kind, side, buyer, seller, source_meta) +VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s) +ON CONFLICT (ts_start, ts_end, provider, kind, side) +DO UPDATE SET + price_pln_net = EXCLUDED.price_pln_net, + buyer = EXCLUDED.buyer, + seller = EXCLUDED.seller, + source_meta = COALESCE(pricing.energy_prices.source_meta, '{}'::jsonb) + || COALESCE(EXCLUDED.source_meta, '{}'::jsonb), + inserted_at = now() +WHERE + pricing.energy_prices.price_pln_net IS DISTINCT FROM EXCLUDED.price_pln_net + OR pricing.energy_prices.buyer IS DISTINCT FROM EXCLUDED.buyer + OR pricing.energy_prices.seller IS DISTINCT FROM EXCLUDED.seller + OR (COALESCE(pricing.energy_prices.source_meta, '{}'::jsonb) + || COALESCE(EXCLUDED.source_meta, '{}'::jsonb)) + IS DISTINCT FROM COALESCE(pricing.energy_prices.source_meta, '{}'::jsonb) +RETURNING 1 AS affected, (xmax <> 0) AS was_update;""" + +SELECT_LAST = """ +select ts_end from pricing.energy_prices ep +where + ep.provider = %s + and ep.kind = %s + and ep.side = %s::price_side + and ep.buyer = %s + and ep.seller = %s +order by ts_start desc +limit 1 +""" + +@dataclass +class EnergyPriceScraperBase: + """Bazowa klasa dla scraperów rynkowych (zbieranie → normalizacja → UPSERT).""" + dsn: Optional[str] = None + conn: Optional[psycopg.Connection] = None + tz: ZoneInfo = WARSAW_TZ + period: timedelta = timedelta(hours=1) + # identyfikatory – NADPISZ w podklasie: + PROVIDER: str = "" # np. 'PSE' / 'instrat' / 'PSTRYK' + KIND: str = "" # np. 'rce' / 'fixing_I' / 'market_price' + SIDE: str = "" # 'buy'|'sell' + BUYER: str = "" # 'end_user' + SELLER: str = "" # 'market_index' + log = None + + # throttling/retry + max_retries: int = 3 + backoff_sec: float = 1.0 + + # ---------- public API ---------- + def provider(self) -> str: + if not self.PROVIDER: + raise NotImplementedError("Subclass must define PROVIDER") + return self.PROVIDER + + def kind(self) -> str: + if not self.KIND: + raise NotImplementedError("Subclass must define KIND") + return self.KIND + + def side(self) -> str: + if not self.SIDE: + raise NotImplementedError("Subclass must define SIDE") + return self.SIDE + + def buyer(self) -> str: + if not self.BUYER: + raise NotImplementedError("Subclass must define BUYER") + return self.BUYER + + def seller(self) -> str: + if not self.SELLER: + raise NotImplementedError("Subclass must define SELLER") + return self.SELLER + + # ---------- public API ---------- + def ingest_day(self, business_day: date) -> int: + """Pobiera i zapisuje całą dobę [00:00, 24:00) lokalnie. Zwraca liczbę upsertowanych wierszy.""" + self.log.info("Ingesting day {}".format(business_day)) + points = self.fetch_day(business_day, WARSAW_TZ) + rows = self._rows_to_upsert(points) + self.log.debug("{} rows inserted".format(len(rows))) + return self._upsert(rows) + + def last_entry(self) -> Optional[datetime]: + self.log.info("Retrieving last entry") + with self._ensure_conn().cursor() as cur: + params = (self.provider(), self.kind(), self.side(), self.buyer(), self.seller()) + cur.execute(SELECT_LAST, params) + res = cur.fetchone() # None if no change, tuple if insert/update + if res is not None: + self.log.debug("last entry {}".format(res)) + return res[0] + + self.log.debug("No last entry") + return None + + def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]: + """Zaimplementuj w podklasie. Zwracaj listę punktów z NETTO PLN/kWh.""" + raise NotImplementedError + + # ---------- helpers ---------- + def _ensure_conn(self) -> psycopg.Connection: + if self.conn: + return self.conn + if not self.dsn: + raise RuntimeError("Podaj dsn= lub conn= dla PriceScraperBase") + self.conn = psycopg.connect(self.dsn) + return self.conn + + def _rows_to_upsert(self, points: Iterable[Tuple[datetime, datetime, float, Dict[str, Any]]]) -> List[IntervalRow]: + rows: List[IntervalRow] = [] + self.log.debug("Creating upsert rows from {} points".format(len(points))) + for ts_start, ts_end, price_pln_kwh_net, meta in points: + # force timezones + assert(ts_start.tzname() is not None) + assert(ts_end.tzname() is not None) + + rows.append(( + ts_start, ts_end, float(price_pln_kwh_net), + self.provider(), self.kind(), self.side(), self.buyer(), self.seller(), + json.dumps(meta or {}) + )) + return rows + + def _upsert(self, rows: list[IntervalRow]) -> int: + if not rows: + return 0 + + affected = 0 + updated = 0 + + self.log.info("Upserting %d rows ", len(rows)) + with self._ensure_conn().cursor() as cur: + for r in rows: + cur.execute(UPSERT_SQL, r) + res = cur.fetchone() # None if no change, tuple if insert/update + if res is not None: + affected += res[0] # res[0] == 1 + updated += int(bool(res[1])) # was_update -> 1/0 + + self._ensure_conn().commit() + self.log.info("Affected=%d (updated=%d, inserted=%d)", affected, updated, affected - updated) + return affected + + def _day_range(self, d: date) -> Tuple[datetime, datetime]: + start = datetime(d.year, d.month, d.day, 0, 0, tzinfo=self.tz) + return start, start + timedelta(days=1) + diff --git a/EnergyPriceScraperFactory.py b/EnergyPriceScraperFactory.py new file mode 100644 index 0000000..6b1b0d8 --- /dev/null +++ b/EnergyPriceScraperFactory.py @@ -0,0 +1,48 @@ +# EnergyPriceScraperFactory.py +from __future__ import annotations +import importlib +from typing import Any, cast, Type +from EnergyPriceScraper import EnergyPriceScraperBase + +import os +import psycopg + +DB_HOST = os.getenv("PGHOST", "192.168.30.10") +DB_PORT = int(os.getenv("PGPORT", "5432")) +DB_NAME = os.getenv("PGDATABASE", "postgres") +DB_USER = os.getenv("PGUSER", "energy_ingest") +DB_PASS = os.getenv("PGPASSWORD", "2f1rLCa03mQrbmlCbD6envk") + +def setup_db(): + # psycopg 3 + conn = psycopg.connect( + host=DB_HOST, port=DB_PORT, dbname=DB_NAME, user=DB_USER, password=DB_PASS + ) + return conn + +def create(name: str, /, **kwargs: Any) -> EnergyPriceScraperBase: + """ + Convention: + module: Scraper.Scraper + class: Provider + Example: create("TauronG13", rates={...}) + """ + safe = "".join(ch for ch in name if ch.isalnum() or ch == "_") + module_name = f"Scraper.{safe}Scraper" + class_name = f"{safe}Scraper" + + try: + mod = importlib.import_module(module_name) + except ModuleNotFoundError as e: + raise ValueError(f"Scraper module not found: {module_name}") from e + + try: + cls = getattr(mod, class_name) + except AttributeError as e: + raise ValueError(f"Scraper class not found: {class_name} in {module_name}") from e + + if not issubclass(cls, EnergyPriceScraperBase): + raise TypeError(f"{class_name} must inherit PriceScraperBase") + + ProviderCls = cast(Type[EnergyPriceScraperBase], cls) + return ProviderCls(**kwargs) # type: ignore[arg-type] diff --git a/README.md b/README.md new file mode 100644 index 0000000..2de9fae --- /dev/null +++ b/README.md @@ -0,0 +1,19 @@ +sudo apt update +sudo apt install -y python3-venv python3-pip git tzdata + +# system user without shell login +sudo useradd -r -s /usr/sbin/nologin -d /opt/energy-scrapers energy + +copy service to +/etc/systemd/system/energy-price-scrapers.service + +sudo install -m 0755 os/energy-price-scrapers-update.sh /usr/local/bin/energy-scrapers-update + +as root + +systemctl daemon-reload +systemctl enable --now energy-price-scrapers.service +systemctl status energy-price-scrapers.service +~~~~ +logi +journalctl -u energy-price-scrapers.service -f diff --git a/Scraper/InstratRDN_CSVScraper.py b/Scraper/InstratRDN_CSVScraper.py new file mode 100644 index 0000000..80c4755 --- /dev/null +++ b/Scraper/InstratRDN_CSVScraper.py @@ -0,0 +1,78 @@ +from __future__ import annotations +from datetime import datetime, timedelta, date +from typing import List, Tuple, Dict, Any +import pandas as pd +from EnergyPriceScraper import EnergyPriceScraperBase +from utils.time_helpers import WARSAW_TZ, UTC + +# CSV pobrane z https://energy.instrat.pl/ceny/energia-rdn-godzinowe/ +class InstratRDN_CSVScraper(EnergyPriceScraperBase): + """ + Przykładowy scraper RDN z CSV/JSON (public HTTP). + Oczekuje CSV z kolumnami: 'date', 'fixing_i_price' (PLN/MWh) lub już PLN/kWh. + """ + url: str + + def __init__(self, path: str, **kwargs): + super().__init__(**kwargs) + + self.PROVIDER = "instrat" + self.KIND = "fixing_I" + self.SIDE = "buy" + self.BUYER = "end_user" # sprzedawca rozliczajacy prosumenta + self.SELLER = "market_index" + + self.data = self.load_instrat_csv(path) + + def load_instrat_csv(self, path: str) -> pd.DataFrame: + """ + Wczytuje CSV Instrat z format + date,fixing_i_price,fixing_i_volume,fixing_ii_price,fixing_ii_volume + 01.01.2016 00:00,108.27,2565.10,108.55,89.10 + """ + # 1) Wczytanie z autodetekcją polskiego formatu + dateparse = lambda x: datetime.strptime(x, '%d.%m.%Y %H:%M').replace(tzinfo=WARSAW_TZ) + df = pd.read_csv(path, parse_dates=['date'], date_parser=dateparse) + + fi_pln_kwh = (df["fixing_i_price"] / 1000.0).round(4) + fii_pln_kwh = (df["fixing_ii_price"] / 1000.0).round(4) + + self.out = pd.DataFrame({ + "fixing_i_pln_kwh": fi_pln_kwh.values, + "fixing_ii_pln_kwh": fii_pln_kwh.values, + "fixing_i_volume": pd.to_numeric(df.get("fixing_i_volume"), errors="coerce").values, + "fixing_ii_volume": pd.to_numeric(df.get("fixing_ii_volume"), errors="coerce").values, + }, index=df["date"]).sort_index() + + # sanity check — nie wyszło pusto + if self.out[["fixing_i_pln_kwh", "fixing_ii_pln_kwh"]].notna().sum().sum() == 0: + raise RuntimeError("Brak cen po przeliczeniu — sprawdź separator/format liczb w CSV.") + + def fetch_day(self, business_day: date) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]: + if not hasattr(self, "out"): + raise RuntimeError("Brak danych: najpierw wczytaj CSV i zbuduj self.out") + + # wybór kolumny wg KIND (domyślnie Fixing I) + kind = getattr(self, "KIND", "fixing_I") + kind_norm = str(kind).replace(" ", "_").lower() + if "fixing_ii" in kind_norm: + col = "fixing_ii_pln_kwh" + fixing_tag = "II" + else: + col = "fixing_i_pln_kwh" + fixing_tag = "I" + + day_start = datetime(business_day.year, business_day.month, business_day.day, 0, 0, tzinfo=self.tz) + day_end = day_start + timedelta(days=1) + + df_day = self.out.loc[(self.out.index >= day_start) & (self.out.index < day_end)] + if col not in df_day.columns: + raise KeyError(f"Column '{col}' does not exists") + + points: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = [] + for ts, price in df_day[col].dropna().items(): + ts_end = ts + getattr(self, "period", timedelta(hours=1)) + points.append((ts.to_pydatetime().astimezone(UTC), ts_end.to_pydatetime().astimezone(UTC), float(price), + {"source": "instrat_csv", "unit": "PLN/kWh", "fixing": fixing_tag, "taxes_included": False})) + return points + diff --git a/Scraper/PSE_RCEScraper.py b/Scraper/PSE_RCEScraper.py new file mode 100644 index 0000000..f598971 --- /dev/null +++ b/Scraper/PSE_RCEScraper.py @@ -0,0 +1,99 @@ +from __future__ import annotations +from datetime import datetime, timedelta, date, timezone +from typing import List, Tuple, Dict, Any +import requests + +from EnergyPriceScraper import EnergyPriceScraperBase +from utils.time_helpers import UTC, WARSAW_TZ + +from logging_utils import HasLogger + + +class PSE_RCEScraper(EnergyPriceScraperBase, HasLogger): + """ + PSE RCE (PLN) – godziny dla danej doby. + Zwraca NETTO PLN/kWh (jeżeli RCE jest w PLN/MWh, dzielimy przez 1000). + """ + api_url: str = "https://api.raporty.pse.pl/api/rce-pln" + session: requests.Session + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self.PROVIDER = "PSE" + self.KIND = "rce" + self.SIDE = "sell" + self.BUYER = "reteiler" + self.SELLER = "prosumer" + + self.init_logger() + + self.session = requests.Session() + self.session.headers.update({"accept": "application/json"}) + self.log.info("Initializing PSE RCE Done") + + def fetch_range(self, start_date: datetime, end_date: datetime) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]: + assert start_date < end_date + assert start_date.tzinfo is not None + assert end_date.tzinfo is not None + assert start_date.tzinfo == UTC + assert end_date.tzinfo == UTC + assert end_date - start_date == timedelta(days=1) # for now no way to import more than one day + + self.log.info(f"Fetching range: [{start_date}, {end_date}) UTC / [{start_date.astimezone(WARSAW_TZ)}, {end_date.astimezone(WARSAW_TZ)}) Europe/Warsaw") + business_day = start_date.astimezone(WARSAW_TZ).date() + self.log.debug(f"business_day: {business_day}") + + # RCE v2: filter by business_date, select rce_pln,dtime,period + params = { + "$select": "rce_pln,publication_ts_utc,dtime_utc,business_date", + "$filter": f"business_date eq '{business_day:%Y-%m-%d}'", + } + r = self.session.get(self.api_url, params=params, timeout=30) + r.raise_for_status() + data = r.json().get("value", []) + self.log.debug(f"Fetched data len: {len(data)} points") + + out: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = [] + for item in data: + out.append(self.parse_pse_rce_record(item)) + + return out + + def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]: + start = datetime( + year=business_day.year, + month=business_day.month, + day=business_day.day, + hour=0, + minute=0, + second=0, + tzinfo=tz + ).astimezone(UTC) + end = start + timedelta(days=1) + return self.fetch_range(start, end) + + @staticmethod + def PSE_date_range(dtime: datetime): + ts_end = dtime + ts_start = dtime - timedelta(minutes=15) + + return ts_start, ts_end + + @staticmethod + def parse_pse_rce_record(rec: dict): + # 'dtime' date is the END of timeslot, so begining is dtime - t_stop + dtime_utc = datetime.strptime(rec["dtime_utc"], "%Y-%m-%d %H:%M:%S").replace(tzinfo=UTC) + ts_start, ts_end = PSE_RCEScraper.PSE_date_range(dtime=dtime_utc) + + price_pln_mwh = float(rec["rce_pln"]) + price_pln_kwh = price_pln_mwh / 1000.0 + + meta = { + "business_date": rec["business_date"], + "source": "PSE_RCE", + "publication_ts_utc": rec["publication_ts_utc"], + "unit": "PLN/kWh", + "taxes_included": False + } + return ts_start, ts_end, price_pln_kwh, meta diff --git a/Scraper/PstrykScraper.py b/Scraper/PstrykScraper.py new file mode 100644 index 0000000..75234f1 --- /dev/null +++ b/Scraper/PstrykScraper.py @@ -0,0 +1,121 @@ +from __future__ import annotations +from datetime import datetime, timedelta, date, timezone +from typing import List, Tuple, Dict, Any, Optional +import os +import requests +from EnergyPriceScraper import EnergyPriceScraperBase + +from logging_utils import HasLogger +from utils.time_helpers import UTC, WARSAW_TZ + + +class PstrykScraper(EnergyPriceScraperBase, HasLogger): + """ + Szablon: ceny publikowane przez sprzedawcę (Pstryk). + Załóż: Bearer token w ENV PSTRYK_TOKEN, endpoint w ENV PSTRYK_API_BASE, np.: + PSTRYK_API_BASE=https://api.pstryk.example.com + Endpoint (przykład): GET /prices?date=YYYY-MM-DD + -> [{"ts":"2025-08-27T00:00:00+02:00","net_pln_kwh":0.44}, ...] + """ + api_base: str + token: str + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self.PROVIDER = "PSTRYK" + self.KIND = "market_price" + self.SIDE = "buy" + self.BUYER = "end_user" + self.SELLER = "PSTRYK" + + self.init_logger() + + self.api_base = os.getenv("PSTRYK_API_BASE", "https://api.pstryk.pl/").rstrip("/") + self.token = os.getenv("PSTRYK_TOKEN", "sk-QLX1AHLF83X15VWPYRUD5G87BK8DBF0SS9XLWQ8R") + if not self.api_base or not self.token: + raise RuntimeError("Ustaw PSTRYK_API_BASE i PSTRYK_TOKEN w środowisku.") + self.session = requests.Session() + self.session.headers.update({ + "accept": "application/json", + "Authorization": f"{self.token}", + "user-agent": "energy-scraper/1.0", + }) + + self.log.debug("Initializing PSTRYK Done") + + def fetch_range(self, start_date: datetime, end_date: datetime) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]: + assert start_date < end_date + assert start_date.tzinfo is not None + assert end_date.tzinfo is not None + assert start_date.tzinfo == UTC + assert end_date.tzinfo == UTC + + url = f"{self.api_base}/integrations/pricing" + self.log.debug(f"Fetching url: {url}") + self.log.info(f"Fetching range: [{start_date}, {end_date}) UTC / [{start_date.astimezone(WARSAW_TZ)}, {end_date.astimezone(WARSAW_TZ)}) Europe/Warsaw") + r = self.session.get(url, params= + { + "resolution": "hour", + "window_start":start_date.strftime("%Y-%m-%dT%H:%M:%SZ"), + "window_end": end_date.strftime("%Y-%m-%dT%H:%M:%SZ"), + }, timeout=30) + r.raise_for_status() + data = r.json() + + out: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = [] + self.log.debug(f"Fetched {len(data['frames'])} data frames for [{start_date}, {end_date}) UTC") + for frame in data['frames']: + row = self.parse_generic_price_frame(frame) + if row is not None: + out.append(row) + return out + + def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]: + start = datetime( + year=business_day.year, + month=business_day.month, + day=business_day.day, + hour=0, + minute=0, + second=0, + tzinfo=tz + ).astimezone(UTC) + + end = start + timedelta(days=1) + return self.fetch_range(start, end) + + def parse_generic_price_frame(self, rec: dict): + """ + Wejście (przykład): + {'start':'2025-09-01T00:00:00+00:00','end':'2025-09-01T01:00:00+00:00', + 'price_net':0.37,'price_gross':0.65,'is_cheap':True,'is_expensive':False} + Wyjście: + (ts_start_utc, ts_end_utc, price_pln_kwh_net, meta) + """ + if rec.get("is_cheap") is None or rec.get("is_expensive") is None: + self.log.info(f"Ignoring non-valid price frame {rec}") + return None + + try: + ts_start = datetime.fromisoformat(rec["start"]).astimezone(UTC) + ts_end = datetime.fromisoformat(rec["end"]).astimezone(UTC) + except Exception as e: + raise ValueError(f"Bad iso timeformat in 'start'/'end': {e}") from e + + if ts_end <= ts_start: + raise ValueError(f"Bad range: start={ts_start.isoformat()} end={ts_end.isoformat()}") + + try: + price_pln_kwh_net = float(rec["price_net"]) + except Exception as e: + raise ValueError(f"Price net not available 'price_net': {e}") from e + + + meta = { + "unit": "PLN/kWh", + "taxes_included": False, + "is_cheap": bool(rec.get("is_cheap") ), + "is_expensive": bool(rec.get("is_expensive")) + } + return ts_start, ts_end, price_pln_kwh_net, meta diff --git a/Scraper/__init__.py b/Scraper/__init__.py new file mode 100644 index 0000000..4c29be4 --- /dev/null +++ b/Scraper/__init__.py @@ -0,0 +1 @@ +# (left intentionally empty; factory imports modules dynamically) diff --git a/app.py b/app.py new file mode 100644 index 0000000..b5066b8 --- /dev/null +++ b/app.py @@ -0,0 +1,83 @@ +import os +import logging +from datetime import datetime, timedelta +from zoneinfo import ZoneInfo +from apscheduler.schedulers.blocking import BlockingScheduler +from apscheduler.triggers.cron import CronTrigger + +import EnergyPriceScraperFactory + +from logging_setup import setup_logging +from utils.time_helpers import WARSAW_TZ + +from app_impl import hourly_tick +from logging_utils import function_logger + +setup_logging(logging.DEBUG) +sched = BlockingScheduler(timezone=WARSAW_TZ) + +def run_pstryk(): + log = function_logger() + log.info("Running PSTRYK") + try: + conn = EnergyPriceScraperFactory.setup_db() + pstryk_scraper = EnergyPriceScraperFactory.create("Pstryk", conn=conn) + # 11:30 and hourly safety check + hourly_tick(pstryk_scraper) + except Exception as e: + log.error(f"PSTRYK throw an exception: {e}") + + +def run_PSE_RCE(): + log = function_logger() + log.info("Running PSE-RCE") + try: + conn = EnergyPriceScraperFactory.setup_db() + scraper = EnergyPriceScraperFactory.create("PSE_RCE", conn=conn) + hourly_tick(scraper) + except Exception as e: + log.error(f"PSE-RCE throw an exception: {e}") + + +# Daily triggers at exact local times (DST-safe) +sched.add_job(run_pstryk, 'cron', hour='*', minute='30') +sched.add_job(run_PSE_RCE, 'cron', hour='*', minute='30') + +sched.start() + +# if __name__ == "__main__": +# setup_logging(logging.DEBUG) +# +# conn = EnergyPriceScraperFactory.setup_db() +# # scraper = EnergyPriceScraperFactory.create("InstratRDN_CSV", conn=conn, path=path) +# # scraper = EnergyPriceScraperFactory.create("PSE_RCE", conn=conn) +# scraper = EnergyPriceScraperFactory.create("Pstryk", conn=conn) + + # distribution_cost = DistributionCostFactory.create("TauronG13") + # energy_price = DynamicPricesProvider.DynamicPricesProvider(PROVIDER="instrat", conn=conn, KIND="fixing I", SIDE="buy") + # + # start = datetime(2025, 6, 27, 0, 0) # lokalny czas PL (jeśli naive – funkcja sama doda WAW) + # + # rows = [] + # for h in range(24*3): + # ts = start + timedelta(hours=h) + # dist = distribution_cost.rate(ts) + # price = energy_price.rate(ts) + # pstryk_fee = 0.08 + # + # rows.append({ + # "ts": ts, + # "Dystrybucja (net)": dist, + # "Energia (net)": price, + # "Pstryk (net)" : pstryk_fee, + # "Podatki": (distribution_cost.rate(ts) + price + pstryk_fee) * 1.23 - (distribution_cost.rate(ts) + price + pstryk_fee), + # }) + # + # fig, ax = plot_stacked_with_negatives( + # rows, + # title="Składowe ceny 1 kWh", + # order=["Dystrybucja (net)", "Energia (net)", "Podatki"] # opcjonalnie + # ) + # + # plt.show() + diff --git a/app_impl.py b/app_impl.py new file mode 100644 index 0000000..fd83ac5 --- /dev/null +++ b/app_impl.py @@ -0,0 +1,88 @@ +from __future__ import annotations +from datetime import datetime, date, time, timedelta, timezone +from zoneinfo import ZoneInfo +from typing import Sequence, Tuple, Optional + +from logging_utils import function_logger +from utils.time_helpers import WARSAW_TZ, UTC + +def local_midnight(d: date, tz: ZoneInfo) -> datetime: + """Return local midnight (tz-aware) for a given calendar date.""" + return datetime(d.year, d.month, d.day, 0, 0, tzinfo=tz) + +def end_of_business_day_utc(business_day: date, tz: ZoneInfo) -> datetime: + """ + End of the business day [start, end) expressed in UTC. + For day D this is local midnight of D+1 converted to UTC. + Correct across 23h/25h DST days. + """ + end_local = local_midnight(business_day + timedelta(days=1), tz=tz) + return end_local.astimezone(UTC) + +def is_day_fully_ingested(scraper, business_day: date, tz: ZoneInfo) -> bool: + """ + Decide if 'business_day' is fully present in DB based on scraper.last_entry(). + Assumes that a fully ingested day ends at local midnight of D+1 in the DB. + """ + log = function_logger() + last: Optional[datetime] = scraper.last_entry() + if not last: + log.info("No last entry found for scrapper") + return False + + log.debug("Last entry: {}".format(last)) + # Normalize to UTC in case session TimeZone differs + assert last.tzinfo is not None, "last must be tz-aware" + + needed_end_utc = end_of_business_day_utc(business_day, tz) + return last >= needed_end_utc + +def maybe_ingest_for_next_day( + scraper, + now_local: datetime, + tz: ZoneInfo +) -> int: + """ + If the publication time for tomorrow has passed (with grace), and tomorrow is not in DB yet, + call ingest_day(tomorrow). Return the number of rows affected by the ingestion. + """ + log = function_logger() + log.debug("Start") + assert now_local.tzinfo is not None, "now_local must be tz-aware (Europe/Warsaw)." + + today = now_local.date() + # Publication happens today for the next business day + target_day = today + timedelta(days=1) + log.info("Target day = {}".format(target_day)) + + if is_day_fully_ingested(scraper, target_day, tz): + log.info("Publication already happend, we got the data already, skipping") + return 0 + + # Ingest the next business day + log.info("Target day is NOT fully ingested, trying to read {}".format(target_day)) + ingested_rows = scraper.ingest_day(target_day) + log.debug("Ingested rows = {}".format(ingested_rows)) + return ingested_rows + +def hourly_tick( + scraper +) -> int: + """ + Run this once per hour. + - Backfills if needed (once on startup or when the DB lags). + - Attempts ingestion for the next business day after each publication window. + Returns (affected_from_backfill, affected_from_next_day). + """ + log = function_logger() + now_local = datetime.now(WARSAW_TZ) + + log.info(f"Hourly tick started for {now_local}") + if now_local.time().hour < 11: + log.info("Hourly tick skipped, too early for update") + return 0 + # 1) Try to ingest the next business day after each con figured window + affected_next = 0 + affected_next += maybe_ingest_for_next_day(scraper, now_local, WARSAW_TZ) + log.info(f"Hourly tick ended for {now_local} with {affected_next} affected rows") + return affected_next diff --git a/install.sh b/install.sh new file mode 100644 index 0000000..bc027bf --- /dev/null +++ b/install.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -euo pipefail + +# --- config --- +REPO_URL="${REPO_URL:-}" +APP_DIR="/opt/energy-price-scrapers" + +# --- clone or update repo --- +if [ ! -d "$APP_DIR/.git" ]; then + git clone "$REPO_URL" "$APP_DIR" +else + git -C "$APP_DIR" fetch --all --prune + git -C "$APP_DIR" checkout main + git -C "$APP_DIR" pull --ff-only +fi + +# --- venv build/refresh --- +python3 -m venv "$APP_DIR/.venv" +"$APP_DIR/.venv/bin/pip" install --upgrade pip setuptools wheel +if [ -f "$APP_DIR/requirements.txt" ]; then + "$APP_DIR/.venv/bin/pip" install -r "$APP_DIR/requirements.txt" +else + echo "requirements.txt missing; aborting." + exit 1 +fi + +chown -R energy:energy "$APP_DIR" +echo "Install complete." + +sudo install -m 0755 $APP_DIR/os/energy-price-scrapers-update.sh /usr/local/bin/energy-price-scrapers-update +sudo install -m 0755 $APP_DIR/os/energy-price-scrapers.service /etc/systemd/system/energy-price-scrapers.service + +sudo systemctl daemon-reload +sudo systemctl enable --now energy-price-scrapers.service +sudo systemctl status energy-price-scrapers.service diff --git a/logging_setup.py b/logging_setup.py new file mode 100644 index 0000000..6bc9a81 --- /dev/null +++ b/logging_setup.py @@ -0,0 +1,52 @@ +import logging.config +import logging, os, sys, platform +try: + # Only available on Linux with systemd + from systemd.journal import JournalHandler # type: ignore + HAS_JOURNAL = True +except Exception: + HAS_JOURNAL = False + + +class MaxLevelFilter(logging.Filter): + """Allow records up to a certain level (inclusive).""" + def __init__(self, level): super().__init__(); self.level = level + def filter(self, record): return record.levelno <= self.level + + +class EnsureContext(logging.Filter): + """Always provide 'context' so the formatter never KeyErrors.""" + def filter(self, record: logging.LogRecord) -> bool: + if not hasattr(record, "context"): + record.context = "" + return True + +def setup_logging(level: int = logging.INFO, syslog_id: str = "energy-scrapers") -> None: + """Use journald if available; otherwise split stdout/stderr (Windows-friendly).""" + root = logging.getLogger() + root.handlers.clear() + root.setLevel(level) + + fmt = logging.Formatter("%(asctime)s %(levelname)s [%(name)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S") + + if HAS_JOURNAL and platform.system() == "Linux": + # Native journald handler (preserves levels/metadata) + h = JournalHandler(SYSLOG_IDENTIFIER=syslog_id) + h.setFormatter(logging.Formatter("%(message)s")) # journald adds timestamp/level + root.addHandler(h) + else: + # Portable fallback: INFO and below -> stdout, WARNING+ -> stderr + h_out = logging.StreamHandler(sys.stdout) + h_out.setLevel(logging.DEBUG) + h_out.addFilter(MaxLevelFilter(logging.INFO)) + h_out.setFormatter(fmt) + + h_err = logging.StreamHandler(sys.stderr) + h_err.setLevel(logging.WARNING) + h_err.setFormatter(fmt) + + root.addHandler(h_out) + root.addHandler(h_err) + + # Optional: make sure Python doesn’t buffer output (useful on Windows/services) + os.environ.setdefault("PYTHONUNBUFFERED", "1") \ No newline at end of file diff --git a/logging_utils.py b/logging_utils.py new file mode 100644 index 0000000..251c020 --- /dev/null +++ b/logging_utils.py @@ -0,0 +1,59 @@ +import logging +from contextlib import contextmanager +import inspect + +def _fmt_ctx(ctx: dict) -> str: + # Build a single bracketed context block for the formatter: "[k=v a=b] " + if not ctx: + return "" + kv = " ".join(f"{k}={v}" for k, v in ctx.items() if v is not None) + return f"[{kv}] " + + +class HasLogger: + """Mixin with explicit logger initialization. No __init__, no MRO dependency.""" + + def __init__(self): + self.log = None + self._log_ctx = None + self._base_logger = None + + def init_logger(self, *, context: dict | None = None, name: str | None = None) -> None: + """Initialize the logger explicitly; call from your class __init__.""" + base_name = name or self.__class__.__name__ + self._base_logger = logging.getLogger(base_name) + self._log_ctx: dict = dict(context or {}) + self.log = logging.LoggerAdapter(self._base_logger, {"context": _fmt_ctx(self._log_ctx)}) + + def set_logger_context(self, **context) -> None: + """Persistently merge new context into this instance and refresh adapter.""" + if not hasattr(self, "_base_logger"): # safety if init_logger was forgotten + self.init_logger() + self._log_ctx.update({k: v for k, v in context.items() if v is not None}) + self.log = logging.LoggerAdapter(self._base_logger, {"context": _fmt_ctx(self._log_ctx)}) + + def child_logger(self, **extra) -> logging.LoggerAdapter: + """Temporary adapter with additional context (does not mutate instance context).""" + if not hasattr(self, "_base_logger"): + self.init_logger() + merged = self._log_ctx.copy() + merged.update({k: v for k, v in extra.items() if v is not None}) + return logging.LoggerAdapter(self._base_logger, {"context": _fmt_ctx(merged)}) + + @contextmanager + def scoped_log(self, **extra): + """Context manager yielding a temporary adapter with extra context.""" + yield self.child_logger(**extra) + + +def function_logger(name: str | None = None, **context): + """Return a LoggerAdapter that follows the HasLogger format for free functions.""" + # Derive a readable default name: "." + if name is None: + frame = inspect.currentframe().f_back # caller + func = frame.f_code.co_name + mod = frame.f_globals.get("__name__", "app") + name = f"{mod}.{func}" + h = HasLogger() + h.init_logger(name=name, context=context) + return h.log \ No newline at end of file diff --git a/os/energy-price-scrapers-update.sh b/os/energy-price-scrapers-update.sh new file mode 100644 index 0000000..c0fb6a3 --- /dev/null +++ b/os/energy-price-scrapers-update.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail +APP_DIR="/opt/energy-price-scrapers" + +sudo -u energy git -C "$APP_DIR" fetch --all --prune +sudo -u energy git -C "$APP_DIR" checkout main +sudo -u energy git -C "$APP_DIR" pull --ff-only + +# upgrade deps if changed +sudo -u energy "$APP_DIR/.venv/bin/pip" install --upgrade pip +if [ -f "$APP_DIR/requirements.txt" ]; then + sudo -u energy "$APP_DIR/.venv/bin/pip" install -r "$APP_DIR/requirements.txt" +fi + +sudo systemctl restart energy-price-scrapers.service +echo "Updated & restarted." diff --git a/os/energy-price-scrapers.service b/os/energy-price-scrapers.service new file mode 100644 index 0000000..eec3a27 --- /dev/null +++ b/os/energy-price-scrapers.service @@ -0,0 +1,23 @@ +[Unit] +Description=Energy price scrapers (APScheduler) +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=energy +Group=energy +WorkingDirectory=/opt/energy-price-scrapers +EnvironmentFile=/etc/energy-price-scrapers.env +# Use the venv python to run your scheduler app +ExecStart=/opt/energy-price-scrapers/.venv/bin/python /opt/energy-price-scrapers/app.py +Restart=always +RestartSec=5 +# Hardening (tune as needed) +NoNewPrivileges=true +PrivateTmp=true +ProtectHome=true +ProtectSystem=full + +[Install] +WantedBy=multi-user.target diff --git a/plot_cost_breakdown.py b/plot_cost_breakdown.py new file mode 100644 index 0000000..1189ac1 --- /dev/null +++ b/plot_cost_breakdown.py @@ -0,0 +1,107 @@ +from __future__ import annotations +from zoneinfo import ZoneInfo +import pandas as pd +import matplotlib.pyplot as plt +from typing import List, Dict, Sequence, Optional +from datetime import datetime +import math + +WAW = ZoneInfo("Europe/Warsaw") + +def plot_stacked_with_negatives( + rows: List[Dict[str, float]], + *, + x_key: str = "ts", # klucz opisujący oś X (np. datetime lub str) + order: Optional[Sequence[str]] = None, # kolejność warstw; domyślnie heurystyka + title: Optional[str] = None, + ylabel: str = "PLN/kWh", + sum_label: str = "Cena brutto", + save_path: Optional[str] = None, +): + """ + Każdy element `rows` to słownik: + { x_key: , "Nazwa składnika 1": val1, "Nazwa składnika 2": val2, ... } + Funkcja NIC nie liczy „merytorycznie” – tylko rysuje to, co dostanie. + - Wart. dodatnie układa w górę, ujemne w dół (oddzielne stacki). + - Linia `sum_label` to suma wszystkich składników (bez x_key) dla każdego słupka. + """ + if not rows: + raise ValueError("Brak danych: 'rows' jest puste.") + + # 1) etykiety osi X + labels = [] + for r in rows: + if x_key not in r: + raise KeyError(f"Brak klucza '{x_key}' w wierszu: {r}") + xv = r[x_key] + labels.append(xv.strftime("%Y-%m-%d\n%H:%M") if isinstance(xv, datetime) else str(xv)) + + # 2) lista kluczy (warstw) + all_keys = [] + for r in rows: + for k in r.keys(): + if k == x_key: + continue + if k not in all_keys: + all_keys.append(k) + + if order: + ordered = [k for k in order if k in all_keys] + [k for k in all_keys if k not in order] + else: + prefer = ["Dystrybucja (net)", "Energia (net)", "Podatki"] + preferred = [k for k in prefer if k in all_keys] + rest = sorted([k for k in all_keys if k not in preferred]) + ordered = preferred + rest + + # 3) macierze wartości + values_by_key = {k: [float(r.get(k, 0.0)) for r in rows] for k in ordered} + n = len(rows) + x = list(range(n)) + + # 4) rysuj – dwa stacki: dodatni (bottom_pos) i ujemny (bottom_neg) + fig, ax = plt.subplots(figsize=(max(8, n * 0.35), 5)) + bottom_pos = [0.0] * n + bottom_neg = [0.0] * n + legend_done = set() + + for k in ordered: + vals = values_by_key[k] + pos_vals = [v if v > 0 else 0.0 for v in vals] + neg_vals = [v if v < 0 else 0.0 for v in vals] + + # zdecyduj, gdzie nadać etykietę do legendy (tylko raz) + label_to_use = None + if k not in legend_done and (any(v != 0 for v in vals)): + label_to_use = k + legend_done.add(k) + + # dodatnie + if any(pos_vals): + ax.bar(x, pos_vals, bottom=bottom_pos, label=label_to_use) + bottom_pos = [b + v for b, v in zip(bottom_pos, pos_vals)] + label_to_use = None # etykieta już wykorzystana + + # ujemne (wysokość ujemna + dolna krawędź = kumulacja ujemnych) + if any(neg_vals): + ax.bar(x, neg_vals, bottom=bottom_neg, label=label_to_use) + bottom_neg = [b + v for b, v in zip(bottom_neg, neg_vals)] + label_to_use = None + + # 5) linia sumy (brutto): suma wszystkich składników w słupku + y_sum = [sum(values_by_key[k][i] for k in ordered) for i in range(n)] + ax.plot(x, y_sum, label=sum_label) + + # 6) oś X – pionowe etykiety i przerzedzenie + step = 1 if n <= 24 else math.ceil(n / 24) + ax.set_xticks(x[::step], [labels[i] for i in range(0, n, step)], rotation=90, ha="center") + + ax.set_ylabel(ylabel) + ax.set_xlabel("Okres") + if title: + ax.set_title(title) + ax.legend() + ax.grid(axis="y", linestyle="--", alpha=0.4) + fig.tight_layout() + if save_path: + fig.savefig(save_path, dpi=150) + return fig, ax \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..95c25ed --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +psycopg[binary]>=3.1,<3.3 +pandas~=2.3.2 +requests>=2.32,<3.0 +apscheduler>=3.10,<4.0 + +systemd-python>=235; sys_platform != "win32" \ No newline at end of file diff --git a/utils/calendar_pl.py b/utils/calendar_pl.py new file mode 100644 index 0000000..8585d6a --- /dev/null +++ b/utils/calendar_pl.py @@ -0,0 +1,44 @@ +from __future__ import annotations +from datetime import date, timedelta, datetime +from zoneinfo import ZoneInfo + +def gov_energy_prices_shield(ts: datetime): + WAW = ZoneInfo("Europe/Warsaw") + START = datetime(2024, 1, 1, 0, 0, tzinfo=WAW) + END = datetime(2025, 9, 30, 23, 59, 59, 999999, tzinfo=WAW) + + ts_local = ts.astimezone(WAW) if ts.tzinfo else ts.replace(tzinfo=WAW) + return START <= ts_local <= END + +def _easter_sunday(year: int) -> date: + # Meeus/Jones/Butcher + a = year % 19 + b = year // 100 + c = year % 100 + d = b // 4 + e = b % 4 + f = (b + 8) // 25 + g = (b - f + 1) // 3 + h = (19*a + b - d - g + 15) % 30 + i = c // 4 + k = c % 4 + l = (32 + 2*e + 2*i - h - k) % 7 + m = (a + 11*h + 22*l) // 451 + month = (h + l - 7*m + 114) // 31 + day = 1 + ((h + l - 7*m + 114) % 31) + return date(year, month, day) + +def polish_holidays(year: int) -> set[date]: + easter = _easter_sunday(year) + return { + date(year, 1, 1), date(year, 1, 6), + date(year, 5, 1), date(year, 5, 3), + date(year, 8, 15), date(year, 11, 1), date(year, 11, 11), + date(year, 12, 25), date(year, 12, 26), + easter, easter + timedelta(days=1), + easter + timedelta(days=49), # Pentecost + easter + timedelta(days=60), # Corpus Christi + } + +def is_weekend_or_holiday(d: date) -> bool: + return d.weekday() >= 5 or d in polish_holidays(d.year) diff --git a/utils/time_helpers.py b/utils/time_helpers.py new file mode 100644 index 0000000..def46f5 --- /dev/null +++ b/utils/time_helpers.py @@ -0,0 +1,15 @@ +from __future__ import annotations +from datetime import time +from typing import Tuple +import zoneinfo + +WARSAW_TZ = zoneinfo.ZoneInfo("Europe/Warsaw") +UTC = zoneinfo.ZoneInfo("UTC") +TimeRange = Tuple[time, time] + +def in_range_local(t_local: time, rng: TimeRange) -> bool: + """[start, end) in local time, supports ranges crossing midnight.""" + start, end = rng + if start <= end: + return start <= t_local < end + return (t_local >= start) or (t_local < end)