This commit is contained in:
Bartosz Wieczorek 2025-09-02 18:14:05 +02:00
commit 166d64d51e
39 changed files with 1889 additions and 0 deletions

185
.gitignore vendored Normal file
View File

@ -0,0 +1,185 @@
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
### VirtualEnv template
# Virtualenv
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
[Bb]in
[Ii]nclude
[Ll]ib
[Ll]ib64
[Ll]ocal
[Ss]cripts
pyvenv.cfg
.venv
pip-selfcheck.json
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
# idea folder, uncomment if you don't need it
# .idea

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/ranczo-energy-usage-scraper.iml" filepath="$PROJECT_DIR$/.idea/ranczo-energy-usage-scraper.iml" />
</modules>
</component>
</project>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

23
DistributionCost.py Normal file
View File

@ -0,0 +1,23 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime
import zoneinfo
from utils.time_helpers import WARSAW_TZ
@dataclass
class DistributionCostBase:
tz: zoneinfo.ZoneInfo = WARSAW_TZ
def to_local_dt(self, ts: datetime) -> datetime:
if ts.tzinfo is None:
return ts.replace(tzinfo=self.tz)
return ts.astimezone(self.tz)
def rate(self, ts: datetime) -> float:
"""Return PLN/kWh (netto) for given timestamp (override in subclasses)."""
raise NotImplementedError
def cost(self, ts: datetime, consumption_kwh: float) -> float:
return self.rate(ts) * float(consumption_kwh)

View File

@ -0,0 +1,34 @@
from __future__ import annotations
import importlib
from typing import Any, Type, cast
from DistributionCost import DistributionCostBase
def create(name: str, /, **kwargs: Any) -> DistributionCostBase:
"""
Instantiate provider by name using a simple convention:
module: DistributionProvider.<Name>Provider
class: <Name>Provider
Example: create("TauronG13", rates=...)
-> DistributionCostProvider.TauronG13Provider.TauronG13Provider(...)
"""
safe = "".join(ch for ch in name if ch.isalnum() or ch == "_")
module_name = f"DistributionCostProvider.{safe}Provider"
class_name = f"{safe}Provider"
try:
mod = importlib.import_module(module_name)
except ModuleNotFoundError as e:
raise ValueError(f"Provider module '{module_name}' not found for name '{name}'.") from e
try:
cls: Type = getattr(mod, class_name)
except AttributeError as e:
raise ValueError(f"Provider class '{class_name}' not found in '{module_name}'.") from e
# be sure that this is a subclass of DistributionCostBase
if not issubclass(cls, DistributionCostBase):
raise TypeError(f"{class_name} does not derive from DistributionCostBase")
ProviderCls = cast(type[DistributionCostBase], cls)
return ProviderCls(**kwargs) # type: ignore[call-arg]

View File

@ -0,0 +1,14 @@
from __future__ import annotations
from datetime import datetime
from DistributionCost import DistributionCostBase
from logging_utils import HasLogger
# ---------- TAURON G11 ----------
class TauronG11Provider(DistributionCostBase, HasLogger):
def __init__(self):
self._init_logger()
def rate(self, ts: datetime) -> float:
return 0.504

View File

@ -0,0 +1,33 @@
from __future__ import annotations
from datetime import time, datetime
from DistributionCost import DistributionCostBase
from utils.time_helpers import in_range_local, TimeRange
# ---------- TAURON G12 ----------
class TauronG12Provider(DistributionCostBase):
"""
Dzień 0,3310 /kWh netto
Noc 0,0994 /kWh netto
"""
low_1: TimeRange = (time(22,0), time(6,0)) # over midnight
low_2: TimeRange = (time(13,0), time(15,0))
high_1: TimeRange = (time(6,0), time(13,0))
high_2: TimeRange = (time(15,0), time(22,0))
def __init__(self):
self.rates={
"high" : 0.3310, #
"low" : 0.0994
}
def rate(self, ts: datetime) -> float:
dt = self.to_local_dt(ts)
t = dt.time()
if in_range_local(t, self.low_1) or in_range_local(t, self.low_2):
return self.rates["low"]
if in_range_local(t, self.high_1) or in_range_local(t, self.high_2):
return self.rates["high"]
return self.rates["high"]

View File

@ -0,0 +1,16 @@
from __future__ import annotations
from DistributionCostProvider.TauronG12Provider import TauronG12Provider
from utils.calendar_pl import is_weekend_or_holiday
# ---------- TAURON G12w ----------
class TauronG12WProvider(TauronG12Provider):
"""
Like G12 on weekdays; whole weekends & holidays are 'low'.
rates={'low':..., 'high':...}
"""
def rate(self, ts):
dt = self.to_local_dt(ts)
if is_weekend_or_holiday(dt.date()):
return self.rates["low"]
return super().rate(ts)

View File

@ -0,0 +1,68 @@
from __future__ import annotations
from datetime import time, date, datetime
from DistributionCost import DistributionCostBase
from utils.time_helpers import in_range_local, TimeRange
from utils.calendar_pl import is_weekend_or_holiday
# ---------- TAURON G13 ----------
class TauronG13Provider(DistributionCostBase):
"""
Szczyt przedpołudniowy
0,2826 /kWh brutto
0,2298 /kWh netto
Szczyt popołudniowy
0,4645 /kWh brutto
0,3777 /kWh netto
Pozostałe godziny
0,0911 /kWh brutto
0,0741 /kWh netto
"""
winter_med: TimeRange = (time(7,0), time(13,0))
winter_high: TimeRange = (time(16,0), time(21,0))
winter_low_1: TimeRange = (time(13,0), time(16,0))
winter_low_2: TimeRange = (time(21,0), time(7,0)) # over midnight
summer_med: TimeRange = (time(7,0), time(13,0))
summer_high: TimeRange = (time(19,0), time(22,0))
summer_low_1: TimeRange = (time(13,0), time(19,0))
summer_low_2: TimeRange = (time(22,0), time(7,0)) # over midnight
def __init__(self):
self.rates={
"high" : 0.3777, # ~0,465 brutto
"med" : 0.2298,
"low" : 0.0741
}
def _is_winter(self, d: date) -> bool:
return d.month in (10, 11, 12, 1, 2, 3)
def rate(self, ts: datetime) -> float:
dt = self.to_local_dt(ts)
d, t = dt.date(), dt.time()
# weekend/holiday → always 'low'
if is_weekend_or_holiday(d):
return self.rates["low"]
if self._is_winter(d):
if in_range_local(t, self.winter_high):
key = "high"
elif in_range_local(t, self.winter_med):
key = "med"
elif in_range_local(t, self.winter_low_1) or in_range_local(t, self.winter_low_2):
key = "low"
else:
key = "low"
else:
if in_range_local(t, self.summer_high):
key = "high"
elif in_range_local(t, self.summer_med):
key = "med"
elif in_range_local(t, self.summer_low_1) or in_range_local(t, self.summer_low_2):
key = "low"
else:
key = "low"
return self.rates[key]

View File

@ -0,0 +1,91 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import time, datetime
from typing import Dict, Literal
from DistributionCost import DistributionCostBase
from utils.time_helpers import in_range_local
from utils.calendar_pl import is_weekend_or_holiday
Season = Literal["summer", "winter"]
DayType = Literal["workday", "dayoff"]
Band = Literal["peak", "offpeak_day", "night"]
RatesDict = Dict[Season, Dict[DayType, Dict[Band, float]]]
@dataclass
class TauronG13SProvider(DistributionCostBase):
"""
TAURON Dystrybucja taryfa G13s (od 1 lipca 2025).
Strefy czasowe (Europe/Warsaw):
* Noc: 21:0007:00 (cały rok)
* Lato: 07:0009:00 (szczyt), 09:0017:00 (pozaszczyt), 17:0021:00 (szczyt)
* Zima: 07:0010:00 (szczyt), 10:0015:00 (pozaszczyt), 15:0021:00 (szczyt)
Ceny różnią się także rodzajem dnia:
- workday = ponpt z wyłączeniem świąt,
- dayoff = sobota, niedziela i dni ustawowo wolne od pracy.
Oczekuje stawek NETTO (PLN/kWh) przekazanych w `rates` według struktury RatesDict.
"""
rates: RatesDict = None # wstrzykuj w konstruktorze
# stałe zakresy godzin
NIGHT: tuple[time, time] = (time(21, 0), time(7, 0)) # przez północ
SUMMER_PEAK_1: tuple[time, time] = (time(7, 0), time(9, 0))
SUMMER_OFFDAY: tuple[time, time] = (time(9, 0), time(17, 0))
SUMMER_PEAK_2: tuple[time, time] = (time(17, 0), time(21, 0))
WINTER_PEAK_1: tuple[time, time] = (time(7, 0), time(10, 0))
WINTER_OFFDAY: tuple[time, time] = (time(10, 0), time(15, 0))
WINTER_PEAK_2: tuple[time, time] = (time(15, 0), time(21, 0))
def __init__(self):
self.rates= {
"winter":
{
"dayoff": { "peak" : 0.20, "offpeak_day" : 0.12, "night" : 0.11 },
"workday":{ "peak" : 0.24, "offpeak_day" : 0.2, "night" : 0.11 }
},
"summer":
{
"dayoff": { "peak": 0.12, "offpeak_day": 0.04, "night": 0.11},
"workday": { "peak": 0.29, "offpeak_day": 0.1, "night": 0.11 }
}
}
@staticmethod
def _season(d: datetime.date) -> Season:
return "summer" if 4 <= d.month <= 9 else "winter"
@staticmethod
def _daytype(d: datetime.date) -> DayType:
return "dayoff" if is_weekend_or_holiday(d) else "workday"
def _band(self, t: time, season: Season) -> Band:
if in_range_local(t, self.NIGHT):
return "night"
if season == "summer":
if in_range_local(t, self.SUMMER_PEAK_1) or in_range_local(t, self.SUMMER_PEAK_2):
return "peak"
if in_range_local(t, self.SUMMER_OFFDAY):
return "offpeak_day"
else: # winter
if in_range_local(t, self.WINTER_PEAK_1) or in_range_local(t, self.WINTER_PEAK_2):
return "peak"
if in_range_local(t, self.WINTER_OFFDAY):
return "offpeak_day"
return "night"
def rate(self, ts: datetime) -> float:
dt = self.to_local_dt(ts)
season = self._season(dt.date())
daytype = self._daytype(dt.date())
band = self._band(dt.time(), season)
try:
return self.rates[season][daytype][band]
except KeyError as e:
raise KeyError(f"no price for [{season}][{daytype}][{band}]") from e

View File

@ -0,0 +1 @@
# empty (kept for package import); discovery is done by the factory via importlib

29
EnergyPrice.py Normal file
View File

@ -0,0 +1,29 @@
# EnergyPrice.py
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime
import zoneinfo
from utils.time_helpers import WARSAW_TZ
@dataclass
class EnergyPriceBase:
tz: zoneinfo.ZoneInfo = WARSAW_TZ
def to_local_dt(self, ts: datetime) -> datetime:
if ts.tzinfo is None:
return ts.replace(tzinfo=self.tz)
return ts.astimezone(self.tz)
def has_rate(self, ts: datetime) -> bool:
try:
self.rate(ts)
return True
except KeyError:
return False
def rate(self, ts: datetime) -> float:
"""Return PLN/kWh (net) for given timestamp (override in subclasses)."""
raise NotImplementedError
def cost(self, ts: datetime, consumption_kwh: float) -> float:
return self.rate(ts) * float(consumption_kwh)

32
EnergyPriceFactory.py Normal file
View File

@ -0,0 +1,32 @@
# EnergyPriceFactory.py
from __future__ import annotations
import importlib
from typing import Any, cast, Type
from EnergyPrice import EnergyPriceBase
def create(name: str, /, **kwargs: Any) -> EnergyPriceBase:
"""
Convention:
module: EnergyPriceProvider.<Name>Provider
class: <Name>Provider
Example: create("TauronG13", rates={...})
"""
safe = "".join(ch for ch in name if ch.isalnum() or ch == "_")
module_name = f"EnergyPriceProvider.{safe}Provider"
class_name = f"{safe}Provider"
try:
mod = importlib.import_module(module_name)
except ModuleNotFoundError as e:
raise ValueError(f"Provider module not found: {module_name}") from e
try:
cls = getattr(mod, class_name)
except AttributeError as e:
raise ValueError(f"Provider class not found: {class_name} in {module_name}") from e
if not issubclass(cls, EnergyPriceBase):
raise TypeError(f"{class_name} must inherit EnergyCostBase")
ProviderCls = cast(Type[EnergyPriceBase], cls)
return ProviderCls(**kwargs) # type: ignore[arg-type]

View File

@ -0,0 +1,113 @@
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import Dict, List, Tuple, Optional
from zoneinfo import ZoneInfo
import psycopg
from EnergyPrice import EnergyPriceBase
WAW = ZoneInfo("Europe/Warsaw")
Interval = Tuple[datetime, datetime, float] # (ts_start, ts_end, price_pln_net)
@dataclass
class DynamicPricesProvider(EnergyPriceBase):
"""
Bazowa klasa dla cen dynamicznych trzymanych w pricing.energy_prices (Timescale).
- rate(ts): zwraca cenę (PLN/kWh netto) dla podanej chwili.
Przy pierwszym wywołaniu dla danej doby ładuje CAŁĄ dobę do cache.
- has_rate(ts): domyślnie True (możesz nadpisać w podklasie, jeśli chcesz sprawdzać realną obecność danych).
- Subklasy definiują PROVIDER i KIND (np. 'TGE' / 'fixing_I').
"""
dsn: Optional[str] = None # np. "postgresql://user:pass@localhost:5432/postgres"
conn: Optional[psycopg.Connection] = None # albo podaj gotowe połączenie psycopg3
table: str = "pricing.energy_prices"
tz: ZoneInfo = WAW
max_cached_days: int = 14 # ile różnych dób trzymać w cache
# identyfikatory nadpisujesz w podklasie
SIDE: str = "" # 'buy' albo 'sell'
PROVIDER: str = ""
KIND: str = ""
# prosty cache: klucz = początek doby (local), wartość = lista interwałów (start, end, price)
_cache: Dict[datetime, List[Interval]] = field(default_factory=dict, init=False, repr=False)
_cache_order: List[datetime] = field(default_factory=list, init=False, repr=False)
# ---------- public API ----------
def provider(self) -> str:
if not self.PROVIDER:
raise NotImplementedError("Subclass must define PROVIDER")
return self.PROVIDER
def kind(self) -> str:
if not self.KIND:
raise NotImplementedError("Subclass must define KIND")
return self.KIND
def side(self) -> str:
if not self.SIDE:
raise NotImplementedError("Subclass must define SIDE")
return self.SIDE
def rate(self, ts: datetime) -> float:
"""Zwraca cenę netto PLN/kWh dla chwili ts. Ładuje cały dzień do cache przy pierwszym wywołaniu."""
dt = self._to_local(ts)
day_key = self._day_key(dt)
self._ensure_day_cached(day_key)
for start, end, price in self._cache.get(day_key, []):
if start <= dt < end:
return price
raise KeyError(f"No price for {dt.isoformat()} (provider={self.provider()}, kind={self.kind()})")
def preload_day(self, day: datetime | None = None):
"""Opcjonalnie: prefetch doby (początek dnia lokalnie)."""
day_key = self._day_key(self._to_local(day or datetime.now(tz=self.tz)))
self._ensure_day_cached(day_key)
def clear_cache(self):
self._cache.clear()
self._cache_order.clear()
# ---------- internals ----------
def _to_local(self, ts: datetime) -> datetime:
return ts.replace(tzinfo=self.tz) if ts.tzinfo is None else ts.astimezone(self.tz)
def _day_key(self, ts_local: datetime) -> datetime:
return ts_local.replace(hour=0, minute=0, second=0, microsecond=0)
def _ensure_conn(self) -> psycopg.Connection:
if self.conn is not None:
return self.conn
if not self.dsn:
raise RuntimeError("Provide dsn= or conn= to DynamicPricesProvider")
self.conn = psycopg.connect(self.dsn)
return self.conn
def _ensure_day_cached(self, day_start_local: datetime):
if day_start_local in self._cache:
return
self._cache[day_start_local] = self._fetch_day(day_start_local)
self._cache_order.append(day_start_local)
# prosty limit cache po liczbie dni
while len(self._cache_order) > self.max_cached_days:
oldest = self._cache_order.pop(0)
self._cache.pop(oldest, None)
def _fetch_day(self, day_start_local: datetime) -> List[Interval]:
"""Pobiera z DB wszystkie rekordy nachodzące na [day_start, day_start+1d)."""
day_end_local = day_start_local + timedelta(days=1)
sql = f"""
SELECT ts_start, ts_end, price_pln_net
FROM {self.table}
WHERE provider = %s
AND kind = %s
AND side = %s
AND tstzrange(ts_start, ts_end, '[)') && tstzrange(%s::timestamptz, %s::timestamptz, '[)')
ORDER BY ts_start
"""
with self._ensure_conn().cursor() as cur:
cur.execute(sql, (self.provider(), self.kind(), self.side(), day_start_local, day_end_local))
rows = cur.fetchall()
# rows: List[Tuple[datetime, datetime, Decimal]]
return [(r[0], r[1], float(r[2])) for r in rows]

View File

@ -0,0 +1,38 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Callable, Mapping, Optional
from EnergyPrice import EnergyPriceBase
PriceLookup = Callable[[datetime], Optional[float]]
@dataclass
class RDNProvider(EnergyPriceBase):
"""
Skeleton for market-based energy cost (RDN).
Choose ONE feeding strategy:
1) price_lookup: callable(ts_local) -> PLN/kWh (net) or None
2) mapping: dict/Mapping[datetime->PLN/kWh] with tz-aware keys (Europe/Warsaw)
Optional: period controls how you round/align (default: 1h).
"""
price_lookup: PriceLookup | None = None
mapping: Mapping[datetime, float] | None = None
period: timedelta = timedelta(hours=1)
def _get_price(self, ts_local: datetime) -> float:
if self.price_lookup:
val = self.price_lookup(ts_local)
if val is None:
raise KeyError(f"No RDN price for {ts_local.isoformat()}")
return float(val)
if self.mapping is not None:
# align to exact period start (e.g., hour start)
aligned = ts_local.replace(minute=0, second=0, microsecond=0)
if aligned in self.mapping:
return float(self.mapping[aligned])
raise KeyError(f"No RDN price in mapping for {aligned.isoformat()}")
raise RuntimeError("RDNProvider needs price_lookup or mapping")
def rate(self, ts: datetime) -> float:
dt = self.to_local_dt(ts)
return self._get_price(dt)

View File

@ -0,0 +1,13 @@
from __future__ import annotations
from EnergyPrice import EnergyPriceBase
from utils.calendar_pl import gov_energy_prices_shield
class TauronG11Provider(EnergyPriceBase):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def rate(self, ts):
if gov_energy_prices_shield(ts):
return 0.62/1.23
else:
raise RuntimeError("brak danych dla nie tarczy, trzeba ogarnąć")

View File

@ -0,0 +1,33 @@
from __future__ import annotations
from datetime import time, datetime
from EnergyPrice import EnergyPriceBase
from utils.time_helpers import in_range_local
from utils.calendar_pl import gov_energy_prices_shield
class TauronG12Provider(EnergyPriceBase):
low1 = (time(22,0), time(6,0))
low2 = (time(13,0), time(15,0))
high1 = (time(6,0), time(13,0))
high2 = (time(15,0), time(22,0))
def __init__(self, **kwargs):
super().__init__(**kwargs)
@staticmethod
def _rates(ts: datetime):
if gov_energy_prices_shield(ts):
tarcza_rates = 0.62 / 1.23
return {
"low": 0.57/1.23, "high": tarcza_rates
}
else:
raise RuntimeError("brak danych dla nie tarczy, trzeba ogarnąć")
def rate(self, ts):
rates = self._rates(ts)
t = self.to_local_dt(ts).time()
if in_range_local(t, self.low1) or in_range_local(t, self.low2):
return rates["low"]
if in_range_local(t, self.high1) or in_range_local(t, self.high2):
return rates["high"]
return rates["high"]

View File

@ -0,0 +1,14 @@
from __future__ import annotations
from EnergyPriceProvider.TauronG12Provider import TauronG12Provider
from utils.calendar_pl import is_weekend_or_holiday
class TauronG12WProvider(TauronG12Provider):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def rate(self, ts):
dt = self.to_local_dt(ts)
if is_weekend_or_holiday(dt.date()):
rates = self._rates(ts)
return rates["low"]
return super().rate(ts)

View File

@ -0,0 +1,72 @@
from __future__ import annotations
from datetime import time, datetime
from EnergyPrice import EnergyPriceBase
from utils.time_helpers import in_range_local
from utils.calendar_pl import is_weekend_or_holiday, gov_energy_prices_shield
class TauronG13Provider(EnergyPriceBase):
"""
Energy cost Tauron G13 (NET PLN/kWh).
Zima (XIII):
medium: 0713
high: 1621
low: 1316, 2107; weekend/święta = low
Lato (IVIX):
medium: 0713
high: 1922
low: 1319, 2207; weekend/święta = low
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.w_med = (time(7,0), time(13,0))
self.w_high = (time(16,0), time(21,0))
self.w_low1 = (time(13,0), time(16,0))
self.w_low2 = (time(21,0), time(7,0))
self.s_med = (time(7,0), time(13,0))
self.s_high = (time(19,0), time(22,0))
self.s_low1 = (time(13,0), time(19,0))
self.s_low2 = (time(22,0), time(7,0))
@staticmethod
def _rates(ts: datetime):
if gov_energy_prices_shield(ts):
tarcza_rates = 0.62/1.23
return {
"summer": { # IVIX
"workday": {"low": tarcza_rates, "medium": tarcza_rates, "high": tarcza_rates},
"dayoff": {"low": tarcza_rates, "medium": tarcza_rates, "high": tarcza_rates},
},
"winter": { # XIII
"workday": {"low": tarcza_rates, "medium": tarcza_rates, "high": tarcza_rates},
"dayoff": {"low": tarcza_rates, "medium": tarcza_rates, "high": tarcza_rates},
},
}
else:
raise RuntimeError("brak danych dla nie tarczy, trzeba ogarnąć")
@staticmethod
def _is_winter(month: int) -> bool:
return month in (10, 11, 12, 1, 2, 3)
def rate(self, ts):
dt = self.to_local_dt(ts)
d, t = dt.date(), dt.time()
rates = self._rates(ts)
if is_weekend_or_holiday(d):
return rates["low"]
if self._is_winter(d.month):
if in_range_local(t, self.w_high): key = "high"
elif in_range_local(t, self.w_med): key = "medium"
elif in_range_local(t, self.w_low1) or in_range_local(t, self.w_low2): key = "low"
else: key = "low"
else:
if in_range_local(t, self.s_high): key = "high"
elif in_range_local(t, self.s_med): key = "medium"
elif in_range_local(t, self.s_low1) or in_range_local(t, self.s_low2): key = "low"
else: key = "low"
return rates[key]

View File

@ -0,0 +1 @@
# (left intentionally empty; factory imports modules dynamically)

163
EnergyPriceScraper.py Normal file
View File

@ -0,0 +1,163 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, date, timedelta, timezone
from typing import Iterable, List, Tuple, Dict, Any, Optional
from zoneinfo import ZoneInfo
import json
import psycopg
from utils.time_helpers import WARSAW_TZ
IntervalRow = Tuple[datetime, datetime, float, str, str, str, str, str, str] # patrz _rows_to_upsert
UPSERT_SQL = """
INSERT INTO pricing.energy_prices
(ts_start, ts_end, price_pln_net, provider, kind, side, buyer, seller, source_meta)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)
ON CONFLICT (ts_start, ts_end, provider, kind, side)
DO UPDATE SET
price_pln_net = EXCLUDED.price_pln_net,
buyer = EXCLUDED.buyer,
seller = EXCLUDED.seller,
source_meta = COALESCE(pricing.energy_prices.source_meta, '{}'::jsonb)
|| COALESCE(EXCLUDED.source_meta, '{}'::jsonb),
inserted_at = now()
WHERE
pricing.energy_prices.price_pln_net IS DISTINCT FROM EXCLUDED.price_pln_net
OR pricing.energy_prices.buyer IS DISTINCT FROM EXCLUDED.buyer
OR pricing.energy_prices.seller IS DISTINCT FROM EXCLUDED.seller
OR (COALESCE(pricing.energy_prices.source_meta, '{}'::jsonb)
|| COALESCE(EXCLUDED.source_meta, '{}'::jsonb))
IS DISTINCT FROM COALESCE(pricing.energy_prices.source_meta, '{}'::jsonb)
RETURNING 1 AS affected, (xmax <> 0) AS was_update;"""
SELECT_LAST = """
select ts_end from pricing.energy_prices ep
where
ep.provider = %s
and ep.kind = %s
and ep.side = %s::price_side
and ep.buyer = %s
and ep.seller = %s
order by ts_start desc
limit 1
"""
@dataclass
class EnergyPriceScraperBase:
"""Bazowa klasa dla scraperów rynkowych (zbieranie → normalizacja → UPSERT)."""
dsn: Optional[str] = None
conn: Optional[psycopg.Connection] = None
tz: ZoneInfo = WARSAW_TZ
period: timedelta = timedelta(hours=1)
# identyfikatory NADPISZ w podklasie:
PROVIDER: str = "" # np. 'PSE' / 'instrat' / 'PSTRYK'
KIND: str = "" # np. 'rce' / 'fixing_I' / 'market_price'
SIDE: str = "" # 'buy'|'sell'
BUYER: str = "" # 'end_user'
SELLER: str = "" # 'market_index'
log = None
# throttling/retry
max_retries: int = 3
backoff_sec: float = 1.0
# ---------- public API ----------
def provider(self) -> str:
if not self.PROVIDER:
raise NotImplementedError("Subclass must define PROVIDER")
return self.PROVIDER
def kind(self) -> str:
if not self.KIND:
raise NotImplementedError("Subclass must define KIND")
return self.KIND
def side(self) -> str:
if not self.SIDE:
raise NotImplementedError("Subclass must define SIDE")
return self.SIDE
def buyer(self) -> str:
if not self.BUYER:
raise NotImplementedError("Subclass must define BUYER")
return self.BUYER
def seller(self) -> str:
if not self.SELLER:
raise NotImplementedError("Subclass must define SELLER")
return self.SELLER
# ---------- public API ----------
def ingest_day(self, business_day: date) -> int:
"""Pobiera i zapisuje całą dobę [00:00, 24:00) lokalnie. Zwraca liczbę upsertowanych wierszy."""
self.log.info("Ingesting day {}".format(business_day))
points = self.fetch_day(business_day, WARSAW_TZ)
rows = self._rows_to_upsert(points)
self.log.debug("{} rows inserted".format(len(rows)))
return self._upsert(rows)
def last_entry(self) -> Optional[datetime]:
self.log.info("Retrieving last entry")
with self._ensure_conn().cursor() as cur:
params = (self.provider(), self.kind(), self.side(), self.buyer(), self.seller())
cur.execute(SELECT_LAST, params)
res = cur.fetchone() # None if no change, tuple if insert/update
if res is not None:
self.log.debug("last entry {}".format(res))
return res[0]
self.log.debug("No last entry")
return None
def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
"""Zaimplementuj w podklasie. Zwracaj listę punktów z NETTO PLN/kWh."""
raise NotImplementedError
# ---------- helpers ----------
def _ensure_conn(self) -> psycopg.Connection:
if self.conn:
return self.conn
if not self.dsn:
raise RuntimeError("Podaj dsn= lub conn= dla PriceScraperBase")
self.conn = psycopg.connect(self.dsn)
return self.conn
def _rows_to_upsert(self, points: Iterable[Tuple[datetime, datetime, float, Dict[str, Any]]]) -> List[IntervalRow]:
rows: List[IntervalRow] = []
self.log.debug("Creating upsert rows from {} points".format(len(points)))
for ts_start, ts_end, price_pln_kwh_net, meta in points:
# force timezones
assert(ts_start.tzname() is not None)
assert(ts_end.tzname() is not None)
rows.append((
ts_start, ts_end, float(price_pln_kwh_net),
self.provider(), self.kind(), self.side(), self.buyer(), self.seller(),
json.dumps(meta or {})
))
return rows
def _upsert(self, rows: list[IntervalRow]) -> int:
if not rows:
return 0
affected = 0
updated = 0
self.log.info("Upserting %d rows ", len(rows))
with self._ensure_conn().cursor() as cur:
for r in rows:
cur.execute(UPSERT_SQL, r)
res = cur.fetchone() # None if no change, tuple if insert/update
if res is not None:
affected += res[0] # res[0] == 1
updated += int(bool(res[1])) # was_update -> 1/0
self._ensure_conn().commit()
self.log.info("Affected=%d (updated=%d, inserted=%d)", affected, updated, affected - updated)
return affected
def _day_range(self, d: date) -> Tuple[datetime, datetime]:
start = datetime(d.year, d.month, d.day, 0, 0, tzinfo=self.tz)
return start, start + timedelta(days=1)

View File

@ -0,0 +1,48 @@
# EnergyPriceScraperFactory.py
from __future__ import annotations
import importlib
from typing import Any, cast, Type
from EnergyPriceScraper import EnergyPriceScraperBase
import os
import psycopg
DB_HOST = os.getenv("PGHOST", "192.168.30.10")
DB_PORT = int(os.getenv("PGPORT", "5432"))
DB_NAME = os.getenv("PGDATABASE", "postgres")
DB_USER = os.getenv("PGUSER", "energy_ingest")
DB_PASS = os.getenv("PGPASSWORD", "2f1rLCa03mQrbmlCbD6envk")
def setup_db():
# psycopg 3
conn = psycopg.connect(
host=DB_HOST, port=DB_PORT, dbname=DB_NAME, user=DB_USER, password=DB_PASS
)
return conn
def create(name: str, /, **kwargs: Any) -> EnergyPriceScraperBase:
"""
Convention:
module: Scraper.<Name>Scraper
class: <Name>Provider
Example: create("TauronG13", rates={...})
"""
safe = "".join(ch for ch in name if ch.isalnum() or ch == "_")
module_name = f"Scraper.{safe}Scraper"
class_name = f"{safe}Scraper"
try:
mod = importlib.import_module(module_name)
except ModuleNotFoundError as e:
raise ValueError(f"Scraper module not found: {module_name}") from e
try:
cls = getattr(mod, class_name)
except AttributeError as e:
raise ValueError(f"Scraper class not found: {class_name} in {module_name}") from e
if not issubclass(cls, EnergyPriceScraperBase):
raise TypeError(f"{class_name} must inherit PriceScraperBase")
ProviderCls = cast(Type[EnergyPriceScraperBase], cls)
return ProviderCls(**kwargs) # type: ignore[arg-type]

19
README.md Normal file
View File

@ -0,0 +1,19 @@
sudo apt update
sudo apt install -y python3-venv python3-pip git tzdata
# system user without shell login
sudo useradd -r -s /usr/sbin/nologin -d /opt/energy-scrapers energy
copy service to
/etc/systemd/system/energy-price-scrapers.service
sudo install -m 0755 os/energy-price-scrapers-update.sh /usr/local/bin/energy-scrapers-update
as root
systemctl daemon-reload
systemctl enable --now energy-price-scrapers.service
systemctl status energy-price-scrapers.service
~~~~
logi
journalctl -u energy-price-scrapers.service -f

View File

@ -0,0 +1,78 @@
from __future__ import annotations
from datetime import datetime, timedelta, date
from typing import List, Tuple, Dict, Any
import pandas as pd
from EnergyPriceScraper import EnergyPriceScraperBase
from utils.time_helpers import WARSAW_TZ, UTC
# CSV pobrane z https://energy.instrat.pl/ceny/energia-rdn-godzinowe/
class InstratRDN_CSVScraper(EnergyPriceScraperBase):
"""
Przykładowy scraper RDN z CSV/JSON (public HTTP).
Oczekuje CSV z kolumnami: 'date', 'fixing_i_price' (PLN/MWh) lub już PLN/kWh.
"""
url: str
def __init__(self, path: str, **kwargs):
super().__init__(**kwargs)
self.PROVIDER = "instrat"
self.KIND = "fixing_I"
self.SIDE = "buy"
self.BUYER = "end_user" # sprzedawca rozliczajacy prosumenta
self.SELLER = "market_index"
self.data = self.load_instrat_csv(path)
def load_instrat_csv(self, path: str) -> pd.DataFrame:
"""
Wczytuje CSV Instrat z format
date,fixing_i_price,fixing_i_volume,fixing_ii_price,fixing_ii_volume
01.01.2016 00:00,108.27,2565.10,108.55,89.10
"""
# 1) Wczytanie z autodetekcją polskiego formatu
dateparse = lambda x: datetime.strptime(x, '%d.%m.%Y %H:%M').replace(tzinfo=WARSAW_TZ)
df = pd.read_csv(path, parse_dates=['date'], date_parser=dateparse)
fi_pln_kwh = (df["fixing_i_price"] / 1000.0).round(4)
fii_pln_kwh = (df["fixing_ii_price"] / 1000.0).round(4)
self.out = pd.DataFrame({
"fixing_i_pln_kwh": fi_pln_kwh.values,
"fixing_ii_pln_kwh": fii_pln_kwh.values,
"fixing_i_volume": pd.to_numeric(df.get("fixing_i_volume"), errors="coerce").values,
"fixing_ii_volume": pd.to_numeric(df.get("fixing_ii_volume"), errors="coerce").values,
}, index=df["date"]).sort_index()
# sanity check — nie wyszło pusto
if self.out[["fixing_i_pln_kwh", "fixing_ii_pln_kwh"]].notna().sum().sum() == 0:
raise RuntimeError("Brak cen po przeliczeniu — sprawdź separator/format liczb w CSV.")
def fetch_day(self, business_day: date) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
if not hasattr(self, "out"):
raise RuntimeError("Brak danych: najpierw wczytaj CSV i zbuduj self.out")
# wybór kolumny wg KIND (domyślnie Fixing I)
kind = getattr(self, "KIND", "fixing_I")
kind_norm = str(kind).replace(" ", "_").lower()
if "fixing_ii" in kind_norm:
col = "fixing_ii_pln_kwh"
fixing_tag = "II"
else:
col = "fixing_i_pln_kwh"
fixing_tag = "I"
day_start = datetime(business_day.year, business_day.month, business_day.day, 0, 0, tzinfo=self.tz)
day_end = day_start + timedelta(days=1)
df_day = self.out.loc[(self.out.index >= day_start) & (self.out.index < day_end)]
if col not in df_day.columns:
raise KeyError(f"Column '{col}' does not exists")
points: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
for ts, price in df_day[col].dropna().items():
ts_end = ts + getattr(self, "period", timedelta(hours=1))
points.append((ts.to_pydatetime().astimezone(UTC), ts_end.to_pydatetime().astimezone(UTC), float(price),
{"source": "instrat_csv", "unit": "PLN/kWh", "fixing": fixing_tag, "taxes_included": False}))
return points

99
Scraper/PSE_RCEScraper.py Normal file
View File

@ -0,0 +1,99 @@
from __future__ import annotations
from datetime import datetime, timedelta, date, timezone
from typing import List, Tuple, Dict, Any
import requests
from EnergyPriceScraper import EnergyPriceScraperBase
from utils.time_helpers import UTC, WARSAW_TZ
from logging_utils import HasLogger
class PSE_RCEScraper(EnergyPriceScraperBase, HasLogger):
"""
PSE RCE (PLN) godziny dla danej doby.
Zwraca NETTO PLN/kWh (jeżeli RCE jest w PLN/MWh, dzielimy przez 1000).
"""
api_url: str = "https://api.raporty.pse.pl/api/rce-pln"
session: requests.Session
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.PROVIDER = "PSE"
self.KIND = "rce"
self.SIDE = "sell"
self.BUYER = "reteiler"
self.SELLER = "prosumer"
self.init_logger()
self.session = requests.Session()
self.session.headers.update({"accept": "application/json"})
self.log.info("Initializing PSE RCE Done")
def fetch_range(self, start_date: datetime, end_date: datetime) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
assert start_date < end_date
assert start_date.tzinfo is not None
assert end_date.tzinfo is not None
assert start_date.tzinfo == UTC
assert end_date.tzinfo == UTC
assert end_date - start_date == timedelta(days=1) # for now no way to import more than one day
self.log.info(f"Fetching range: [{start_date}, {end_date}) UTC / [{start_date.astimezone(WARSAW_TZ)}, {end_date.astimezone(WARSAW_TZ)}) Europe/Warsaw")
business_day = start_date.astimezone(WARSAW_TZ).date()
self.log.debug(f"business_day: {business_day}")
# RCE v2: filter by business_date, select rce_pln,dtime,period
params = {
"$select": "rce_pln,publication_ts_utc,dtime_utc,business_date",
"$filter": f"business_date eq '{business_day:%Y-%m-%d}'",
}
r = self.session.get(self.api_url, params=params, timeout=30)
r.raise_for_status()
data = r.json().get("value", [])
self.log.debug(f"Fetched data len: {len(data)} points")
out: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
for item in data:
out.append(self.parse_pse_rce_record(item))
return out
def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
start = datetime(
year=business_day.year,
month=business_day.month,
day=business_day.day,
hour=0,
minute=0,
second=0,
tzinfo=tz
).astimezone(UTC)
end = start + timedelta(days=1)
return self.fetch_range(start, end)
@staticmethod
def PSE_date_range(dtime: datetime):
ts_end = dtime
ts_start = dtime - timedelta(minutes=15)
return ts_start, ts_end
@staticmethod
def parse_pse_rce_record(rec: dict):
# 'dtime' date is the END of timeslot, so begining is dtime - t_stop
dtime_utc = datetime.strptime(rec["dtime_utc"], "%Y-%m-%d %H:%M:%S").replace(tzinfo=UTC)
ts_start, ts_end = PSE_RCEScraper.PSE_date_range(dtime=dtime_utc)
price_pln_mwh = float(rec["rce_pln"])
price_pln_kwh = price_pln_mwh / 1000.0
meta = {
"business_date": rec["business_date"],
"source": "PSE_RCE",
"publication_ts_utc": rec["publication_ts_utc"],
"unit": "PLN/kWh",
"taxes_included": False
}
return ts_start, ts_end, price_pln_kwh, meta

121
Scraper/PstrykScraper.py Normal file
View File

@ -0,0 +1,121 @@
from __future__ import annotations
from datetime import datetime, timedelta, date, timezone
from typing import List, Tuple, Dict, Any, Optional
import os
import requests
from EnergyPriceScraper import EnergyPriceScraperBase
from logging_utils import HasLogger
from utils.time_helpers import UTC, WARSAW_TZ
class PstrykScraper(EnergyPriceScraperBase, HasLogger):
"""
Szablon: ceny publikowane przez sprzedawcę (Pstryk).
Załóż: Bearer token w ENV PSTRYK_TOKEN, endpoint w ENV PSTRYK_API_BASE, np.:
PSTRYK_API_BASE=https://api.pstryk.example.com
Endpoint (przykład): GET /prices?date=YYYY-MM-DD
-> [{"ts":"2025-08-27T00:00:00+02:00","net_pln_kwh":0.44}, ...]
"""
api_base: str
token: str
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.PROVIDER = "PSTRYK"
self.KIND = "market_price"
self.SIDE = "buy"
self.BUYER = "end_user"
self.SELLER = "PSTRYK"
self.init_logger()
self.api_base = os.getenv("PSTRYK_API_BASE", "https://api.pstryk.pl/").rstrip("/")
self.token = os.getenv("PSTRYK_TOKEN", "sk-QLX1AHLF83X15VWPYRUD5G87BK8DBF0SS9XLWQ8R")
if not self.api_base or not self.token:
raise RuntimeError("Ustaw PSTRYK_API_BASE i PSTRYK_TOKEN w środowisku.")
self.session = requests.Session()
self.session.headers.update({
"accept": "application/json",
"Authorization": f"{self.token}",
"user-agent": "energy-scraper/1.0",
})
self.log.debug("Initializing PSTRYK Done")
def fetch_range(self, start_date: datetime, end_date: datetime) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
assert start_date < end_date
assert start_date.tzinfo is not None
assert end_date.tzinfo is not None
assert start_date.tzinfo == UTC
assert end_date.tzinfo == UTC
url = f"{self.api_base}/integrations/pricing"
self.log.debug(f"Fetching url: {url}")
self.log.info(f"Fetching range: [{start_date}, {end_date}) UTC / [{start_date.astimezone(WARSAW_TZ)}, {end_date.astimezone(WARSAW_TZ)}) Europe/Warsaw")
r = self.session.get(url, params=
{
"resolution": "hour",
"window_start":start_date.strftime("%Y-%m-%dT%H:%M:%SZ"),
"window_end": end_date.strftime("%Y-%m-%dT%H:%M:%SZ"),
}, timeout=30)
r.raise_for_status()
data = r.json()
out: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
self.log.debug(f"Fetched {len(data['frames'])} data frames for [{start_date}, {end_date}) UTC")
for frame in data['frames']:
row = self.parse_generic_price_frame(frame)
if row is not None:
out.append(row)
return out
def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
start = datetime(
year=business_day.year,
month=business_day.month,
day=business_day.day,
hour=0,
minute=0,
second=0,
tzinfo=tz
).astimezone(UTC)
end = start + timedelta(days=1)
return self.fetch_range(start, end)
def parse_generic_price_frame(self, rec: dict):
"""
Wejście (przykład):
{'start':'2025-09-01T00:00:00+00:00','end':'2025-09-01T01:00:00+00:00',
'price_net':0.37,'price_gross':0.65,'is_cheap':True,'is_expensive':False}
Wyjście:
(ts_start_utc, ts_end_utc, price_pln_kwh_net, meta)
"""
if rec.get("is_cheap") is None or rec.get("is_expensive") is None:
self.log.info(f"Ignoring non-valid price frame {rec}")
return None
try:
ts_start = datetime.fromisoformat(rec["start"]).astimezone(UTC)
ts_end = datetime.fromisoformat(rec["end"]).astimezone(UTC)
except Exception as e:
raise ValueError(f"Bad iso timeformat in 'start'/'end': {e}") from e
if ts_end <= ts_start:
raise ValueError(f"Bad range: start={ts_start.isoformat()} end={ts_end.isoformat()}")
try:
price_pln_kwh_net = float(rec["price_net"])
except Exception as e:
raise ValueError(f"Price net not available 'price_net': {e}") from e
meta = {
"unit": "PLN/kWh",
"taxes_included": False,
"is_cheap": bool(rec.get("is_cheap") ),
"is_expensive": bool(rec.get("is_expensive"))
}
return ts_start, ts_end, price_pln_kwh_net, meta

1
Scraper/__init__.py Normal file
View File

@ -0,0 +1 @@
# (left intentionally empty; factory imports modules dynamically)

83
app.py Normal file
View File

@ -0,0 +1,83 @@
import os
import logging
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.triggers.cron import CronTrigger
import EnergyPriceScraperFactory
from logging_setup import setup_logging
from utils.time_helpers import WARSAW_TZ
from app_impl import hourly_tick
from logging_utils import function_logger
setup_logging(logging.DEBUG)
sched = BlockingScheduler(timezone=WARSAW_TZ)
def run_pstryk():
log = function_logger()
log.info("Running PSTRYK")
try:
conn = EnergyPriceScraperFactory.setup_db()
pstryk_scraper = EnergyPriceScraperFactory.create("Pstryk", conn=conn)
# 11:30 and hourly safety check
hourly_tick(pstryk_scraper)
except Exception as e:
log.error(f"PSTRYK throw an exception: {e}")
def run_PSE_RCE():
log = function_logger()
log.info("Running PSE-RCE")
try:
conn = EnergyPriceScraperFactory.setup_db()
scraper = EnergyPriceScraperFactory.create("PSE_RCE", conn=conn)
hourly_tick(scraper)
except Exception as e:
log.error(f"PSE-RCE throw an exception: {e}")
# Daily triggers at exact local times (DST-safe)
sched.add_job(run_pstryk, 'cron', hour='*', minute='30')
sched.add_job(run_PSE_RCE, 'cron', hour='*', minute='30')
sched.start()
# if __name__ == "__main__":
# setup_logging(logging.DEBUG)
#
# conn = EnergyPriceScraperFactory.setup_db()
# # scraper = EnergyPriceScraperFactory.create("InstratRDN_CSV", conn=conn, path=path)
# # scraper = EnergyPriceScraperFactory.create("PSE_RCE", conn=conn)
# scraper = EnergyPriceScraperFactory.create("Pstryk", conn=conn)
# distribution_cost = DistributionCostFactory.create("TauronG13")
# energy_price = DynamicPricesProvider.DynamicPricesProvider(PROVIDER="instrat", conn=conn, KIND="fixing I", SIDE="buy")
#
# start = datetime(2025, 6, 27, 0, 0) # lokalny czas PL (jeśli naive funkcja sama doda WAW)
#
# rows = []
# for h in range(24*3):
# ts = start + timedelta(hours=h)
# dist = distribution_cost.rate(ts)
# price = energy_price.rate(ts)
# pstryk_fee = 0.08
#
# rows.append({
# "ts": ts,
# "Dystrybucja (net)": dist,
# "Energia (net)": price,
# "Pstryk (net)" : pstryk_fee,
# "Podatki": (distribution_cost.rate(ts) + price + pstryk_fee) * 1.23 - (distribution_cost.rate(ts) + price + pstryk_fee),
# })
#
# fig, ax = plot_stacked_with_negatives(
# rows,
# title="Składowe ceny 1 kWh",
# order=["Dystrybucja (net)", "Energia (net)", "Podatki"] # opcjonalnie
# )
#
# plt.show()

88
app_impl.py Normal file
View File

@ -0,0 +1,88 @@
from __future__ import annotations
from datetime import datetime, date, time, timedelta, timezone
from zoneinfo import ZoneInfo
from typing import Sequence, Tuple, Optional
from logging_utils import function_logger
from utils.time_helpers import WARSAW_TZ, UTC
def local_midnight(d: date, tz: ZoneInfo) -> datetime:
"""Return local midnight (tz-aware) for a given calendar date."""
return datetime(d.year, d.month, d.day, 0, 0, tzinfo=tz)
def end_of_business_day_utc(business_day: date, tz: ZoneInfo) -> datetime:
"""
End of the business day [start, end) expressed in UTC.
For day D this is local midnight of D+1 converted to UTC.
Correct across 23h/25h DST days.
"""
end_local = local_midnight(business_day + timedelta(days=1), tz=tz)
return end_local.astimezone(UTC)
def is_day_fully_ingested(scraper, business_day: date, tz: ZoneInfo) -> bool:
"""
Decide if 'business_day' is fully present in DB based on scraper.last_entry().
Assumes that a fully ingested day ends at local midnight of D+1 in the DB.
"""
log = function_logger()
last: Optional[datetime] = scraper.last_entry()
if not last:
log.info("No last entry found for scrapper")
return False
log.debug("Last entry: {}".format(last))
# Normalize to UTC in case session TimeZone differs
assert last.tzinfo is not None, "last must be tz-aware"
needed_end_utc = end_of_business_day_utc(business_day, tz)
return last >= needed_end_utc
def maybe_ingest_for_next_day(
scraper,
now_local: datetime,
tz: ZoneInfo
) -> int:
"""
If the publication time for tomorrow has passed (with grace), and tomorrow is not in DB yet,
call ingest_day(tomorrow). Return the number of rows affected by the ingestion.
"""
log = function_logger()
log.debug("Start")
assert now_local.tzinfo is not None, "now_local must be tz-aware (Europe/Warsaw)."
today = now_local.date()
# Publication happens today for the next business day
target_day = today + timedelta(days=1)
log.info("Target day = {}".format(target_day))
if is_day_fully_ingested(scraper, target_day, tz):
log.info("Publication already happend, we got the data already, skipping")
return 0
# Ingest the next business day
log.info("Target day is NOT fully ingested, trying to read {}".format(target_day))
ingested_rows = scraper.ingest_day(target_day)
log.debug("Ingested rows = {}".format(ingested_rows))
return ingested_rows
def hourly_tick(
scraper
) -> int:
"""
Run this once per hour.
- Backfills if needed (once on startup or when the DB lags).
- Attempts ingestion for the next business day after each publication window.
Returns (affected_from_backfill, affected_from_next_day).
"""
log = function_logger()
now_local = datetime.now(WARSAW_TZ)
log.info(f"Hourly tick started for {now_local}")
if now_local.time().hour < 11:
log.info("Hourly tick skipped, too early for update")
return 0
# 1) Try to ingest the next business day after each con figured window
affected_next = 0
affected_next += maybe_ingest_for_next_day(scraper, now_local, WARSAW_TZ)
log.info(f"Hourly tick ended for {now_local} with {affected_next} affected rows")
return affected_next

35
install.sh Normal file
View File

@ -0,0 +1,35 @@
#!/usr/bin/env bash
set -euo pipefail
# --- config ---
REPO_URL="${REPO_URL:-<PUT_YOUR_GIT_URL_HERE>}"
APP_DIR="/opt/energy-price-scrapers"
# --- clone or update repo ---
if [ ! -d "$APP_DIR/.git" ]; then
git clone "$REPO_URL" "$APP_DIR"
else
git -C "$APP_DIR" fetch --all --prune
git -C "$APP_DIR" checkout main
git -C "$APP_DIR" pull --ff-only
fi
# --- venv build/refresh ---
python3 -m venv "$APP_DIR/.venv"
"$APP_DIR/.venv/bin/pip" install --upgrade pip setuptools wheel
if [ -f "$APP_DIR/requirements.txt" ]; then
"$APP_DIR/.venv/bin/pip" install -r "$APP_DIR/requirements.txt"
else
echo "requirements.txt missing; aborting."
exit 1
fi
chown -R energy:energy "$APP_DIR"
echo "Install complete."
sudo install -m 0755 $APP_DIR/os/energy-price-scrapers-update.sh /usr/local/bin/energy-price-scrapers-update
sudo install -m 0755 $APP_DIR/os/energy-price-scrapers.service /etc/systemd/system/energy-price-scrapers.service
sudo systemctl daemon-reload
sudo systemctl enable --now energy-price-scrapers.service
sudo systemctl status energy-price-scrapers.service

52
logging_setup.py Normal file
View File

@ -0,0 +1,52 @@
import logging.config
import logging, os, sys, platform
try:
# Only available on Linux with systemd
from systemd.journal import JournalHandler # type: ignore
HAS_JOURNAL = True
except Exception:
HAS_JOURNAL = False
class MaxLevelFilter(logging.Filter):
"""Allow records up to a certain level (inclusive)."""
def __init__(self, level): super().__init__(); self.level = level
def filter(self, record): return record.levelno <= self.level
class EnsureContext(logging.Filter):
"""Always provide 'context' so the formatter never KeyErrors."""
def filter(self, record: logging.LogRecord) -> bool:
if not hasattr(record, "context"):
record.context = ""
return True
def setup_logging(level: int = logging.INFO, syslog_id: str = "energy-scrapers") -> None:
"""Use journald if available; otherwise split stdout/stderr (Windows-friendly)."""
root = logging.getLogger()
root.handlers.clear()
root.setLevel(level)
fmt = logging.Formatter("%(asctime)s %(levelname)s [%(name)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
if HAS_JOURNAL and platform.system() == "Linux":
# Native journald handler (preserves levels/metadata)
h = JournalHandler(SYSLOG_IDENTIFIER=syslog_id)
h.setFormatter(logging.Formatter("%(message)s")) # journald adds timestamp/level
root.addHandler(h)
else:
# Portable fallback: INFO and below -> stdout, WARNING+ -> stderr
h_out = logging.StreamHandler(sys.stdout)
h_out.setLevel(logging.DEBUG)
h_out.addFilter(MaxLevelFilter(logging.INFO))
h_out.setFormatter(fmt)
h_err = logging.StreamHandler(sys.stderr)
h_err.setLevel(logging.WARNING)
h_err.setFormatter(fmt)
root.addHandler(h_out)
root.addHandler(h_err)
# Optional: make sure Python doesnt buffer output (useful on Windows/services)
os.environ.setdefault("PYTHONUNBUFFERED", "1")

59
logging_utils.py Normal file
View File

@ -0,0 +1,59 @@
import logging
from contextlib import contextmanager
import inspect
def _fmt_ctx(ctx: dict) -> str:
# Build a single bracketed context block for the formatter: "[k=v a=b] "
if not ctx:
return ""
kv = " ".join(f"{k}={v}" for k, v in ctx.items() if v is not None)
return f"[{kv}] "
class HasLogger:
"""Mixin with explicit logger initialization. No __init__, no MRO dependency."""
def __init__(self):
self.log = None
self._log_ctx = None
self._base_logger = None
def init_logger(self, *, context: dict | None = None, name: str | None = None) -> None:
"""Initialize the logger explicitly; call from your class __init__."""
base_name = name or self.__class__.__name__
self._base_logger = logging.getLogger(base_name)
self._log_ctx: dict = dict(context or {})
self.log = logging.LoggerAdapter(self._base_logger, {"context": _fmt_ctx(self._log_ctx)})
def set_logger_context(self, **context) -> None:
"""Persistently merge new context into this instance and refresh adapter."""
if not hasattr(self, "_base_logger"): # safety if init_logger was forgotten
self.init_logger()
self._log_ctx.update({k: v for k, v in context.items() if v is not None})
self.log = logging.LoggerAdapter(self._base_logger, {"context": _fmt_ctx(self._log_ctx)})
def child_logger(self, **extra) -> logging.LoggerAdapter:
"""Temporary adapter with additional context (does not mutate instance context)."""
if not hasattr(self, "_base_logger"):
self.init_logger()
merged = self._log_ctx.copy()
merged.update({k: v for k, v in extra.items() if v is not None})
return logging.LoggerAdapter(self._base_logger, {"context": _fmt_ctx(merged)})
@contextmanager
def scoped_log(self, **extra):
"""Context manager yielding a temporary adapter with extra context."""
yield self.child_logger(**extra)
def function_logger(name: str | None = None, **context):
"""Return a LoggerAdapter that follows the HasLogger format for free functions."""
# Derive a readable default name: "<module>.<function>"
if name is None:
frame = inspect.currentframe().f_back # caller
func = frame.f_code.co_name
mod = frame.f_globals.get("__name__", "app")
name = f"{mod}.{func}"
h = HasLogger()
h.init_logger(name=name, context=context)
return h.log

View File

@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -euo pipefail
APP_DIR="/opt/energy-price-scrapers"
sudo -u energy git -C "$APP_DIR" fetch --all --prune
sudo -u energy git -C "$APP_DIR" checkout main
sudo -u energy git -C "$APP_DIR" pull --ff-only
# upgrade deps if changed
sudo -u energy "$APP_DIR/.venv/bin/pip" install --upgrade pip
if [ -f "$APP_DIR/requirements.txt" ]; then
sudo -u energy "$APP_DIR/.venv/bin/pip" install -r "$APP_DIR/requirements.txt"
fi
sudo systemctl restart energy-price-scrapers.service
echo "Updated & restarted."

View File

@ -0,0 +1,23 @@
[Unit]
Description=Energy price scrapers (APScheduler)
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=energy
Group=energy
WorkingDirectory=/opt/energy-price-scrapers
EnvironmentFile=/etc/energy-price-scrapers.env
# Use the venv python to run your scheduler app
ExecStart=/opt/energy-price-scrapers/.venv/bin/python /opt/energy-price-scrapers/app.py
Restart=always
RestartSec=5
# Hardening (tune as needed)
NoNewPrivileges=true
PrivateTmp=true
ProtectHome=true
ProtectSystem=full
[Install]
WantedBy=multi-user.target

107
plot_cost_breakdown.py Normal file
View File

@ -0,0 +1,107 @@
from __future__ import annotations
from zoneinfo import ZoneInfo
import pandas as pd
import matplotlib.pyplot as plt
from typing import List, Dict, Sequence, Optional
from datetime import datetime
import math
WAW = ZoneInfo("Europe/Warsaw")
def plot_stacked_with_negatives(
rows: List[Dict[str, float]],
*,
x_key: str = "ts", # klucz opisujący oś X (np. datetime lub str)
order: Optional[Sequence[str]] = None, # kolejność warstw; domyślnie heurystyka
title: Optional[str] = None,
ylabel: str = "PLN/kWh",
sum_label: str = "Cena brutto",
save_path: Optional[str] = None,
):
"""
Każdy element `rows` to słownik:
{ x_key: <datetime|str>, "Nazwa składnika 1": val1, "Nazwa składnika 2": val2, ... }
Funkcja NIC nie liczy merytorycznie tylko rysuje to, co dostanie.
- Wart. dodatnie układa w górę, ujemne w dół (oddzielne stacki).
- Linia `sum_label` to suma wszystkich składników (bez x_key) dla każdego słupka.
"""
if not rows:
raise ValueError("Brak danych: 'rows' jest puste.")
# 1) etykiety osi X
labels = []
for r in rows:
if x_key not in r:
raise KeyError(f"Brak klucza '{x_key}' w wierszu: {r}")
xv = r[x_key]
labels.append(xv.strftime("%Y-%m-%d\n%H:%M") if isinstance(xv, datetime) else str(xv))
# 2) lista kluczy (warstw)
all_keys = []
for r in rows:
for k in r.keys():
if k == x_key:
continue
if k not in all_keys:
all_keys.append(k)
if order:
ordered = [k for k in order if k in all_keys] + [k for k in all_keys if k not in order]
else:
prefer = ["Dystrybucja (net)", "Energia (net)", "Podatki"]
preferred = [k for k in prefer if k in all_keys]
rest = sorted([k for k in all_keys if k not in preferred])
ordered = preferred + rest
# 3) macierze wartości
values_by_key = {k: [float(r.get(k, 0.0)) for r in rows] for k in ordered}
n = len(rows)
x = list(range(n))
# 4) rysuj dwa stacki: dodatni (bottom_pos) i ujemny (bottom_neg)
fig, ax = plt.subplots(figsize=(max(8, n * 0.35), 5))
bottom_pos = [0.0] * n
bottom_neg = [0.0] * n
legend_done = set()
for k in ordered:
vals = values_by_key[k]
pos_vals = [v if v > 0 else 0.0 for v in vals]
neg_vals = [v if v < 0 else 0.0 for v in vals]
# zdecyduj, gdzie nadać etykietę do legendy (tylko raz)
label_to_use = None
if k not in legend_done and (any(v != 0 for v in vals)):
label_to_use = k
legend_done.add(k)
# dodatnie
if any(pos_vals):
ax.bar(x, pos_vals, bottom=bottom_pos, label=label_to_use)
bottom_pos = [b + v for b, v in zip(bottom_pos, pos_vals)]
label_to_use = None # etykieta już wykorzystana
# ujemne (wysokość ujemna + dolna krawędź = kumulacja ujemnych)
if any(neg_vals):
ax.bar(x, neg_vals, bottom=bottom_neg, label=label_to_use)
bottom_neg = [b + v for b, v in zip(bottom_neg, neg_vals)]
label_to_use = None
# 5) linia sumy (brutto): suma wszystkich składników w słupku
y_sum = [sum(values_by_key[k][i] for k in ordered) for i in range(n)]
ax.plot(x, y_sum, label=sum_label)
# 6) oś X pionowe etykiety i przerzedzenie
step = 1 if n <= 24 else math.ceil(n / 24)
ax.set_xticks(x[::step], [labels[i] for i in range(0, n, step)], rotation=90, ha="center")
ax.set_ylabel(ylabel)
ax.set_xlabel("Okres")
if title:
ax.set_title(title)
ax.legend()
ax.grid(axis="y", linestyle="--", alpha=0.4)
fig.tight_layout()
if save_path:
fig.savefig(save_path, dpi=150)
return fig, ax

6
requirements.txt Normal file
View File

@ -0,0 +1,6 @@
psycopg[binary]>=3.1,<3.3
pandas~=2.3.2
requests>=2.32,<3.0
apscheduler>=3.10,<4.0
systemd-python>=235; sys_platform != "win32"

44
utils/calendar_pl.py Normal file
View File

@ -0,0 +1,44 @@
from __future__ import annotations
from datetime import date, timedelta, datetime
from zoneinfo import ZoneInfo
def gov_energy_prices_shield(ts: datetime):
WAW = ZoneInfo("Europe/Warsaw")
START = datetime(2024, 1, 1, 0, 0, tzinfo=WAW)
END = datetime(2025, 9, 30, 23, 59, 59, 999999, tzinfo=WAW)
ts_local = ts.astimezone(WAW) if ts.tzinfo else ts.replace(tzinfo=WAW)
return START <= ts_local <= END
def _easter_sunday(year: int) -> date:
# Meeus/Jones/Butcher
a = year % 19
b = year // 100
c = year % 100
d = b // 4
e = b % 4
f = (b + 8) // 25
g = (b - f + 1) // 3
h = (19*a + b - d - g + 15) % 30
i = c // 4
k = c % 4
l = (32 + 2*e + 2*i - h - k) % 7
m = (a + 11*h + 22*l) // 451
month = (h + l - 7*m + 114) // 31
day = 1 + ((h + l - 7*m + 114) % 31)
return date(year, month, day)
def polish_holidays(year: int) -> set[date]:
easter = _easter_sunday(year)
return {
date(year, 1, 1), date(year, 1, 6),
date(year, 5, 1), date(year, 5, 3),
date(year, 8, 15), date(year, 11, 1), date(year, 11, 11),
date(year, 12, 25), date(year, 12, 26),
easter, easter + timedelta(days=1),
easter + timedelta(days=49), # Pentecost
easter + timedelta(days=60), # Corpus Christi
}
def is_weekend_or_holiday(d: date) -> bool:
return d.weekday() >= 5 or d in polish_holidays(d.year)

15
utils/time_helpers.py Normal file
View File

@ -0,0 +1,15 @@
from __future__ import annotations
from datetime import time
from typing import Tuple
import zoneinfo
WARSAW_TZ = zoneinfo.ZoneInfo("Europe/Warsaw")
UTC = zoneinfo.ZoneInfo("UTC")
TimeRange = Tuple[time, time]
def in_range_local(t_local: time, rng: TimeRange) -> bool:
"""[start, end) in local time, supports ranges crossing midnight."""
start, end = rng
if start <= end:
return start <= t_local < end
return (t_local >= start) or (t_local < end)