init
This commit is contained in:
commit
166d64d51e
185
.gitignore
vendored
Normal file
185
.gitignore
vendored
Normal file
@ -0,0 +1,185 @@
|
||||
# Created by .ignore support plugin (hsz.mobi)
|
||||
### Python template
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*,cover
|
||||
.hypothesis/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# IPython Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
# virtualenv
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
### VirtualEnv template
|
||||
# Virtualenv
|
||||
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
|
||||
[Bb]in
|
||||
[Ii]nclude
|
||||
[Ll]ib
|
||||
[Ll]ib64
|
||||
[Ll]ocal
|
||||
[Ss]cripts
|
||||
pyvenv.cfg
|
||||
.venv
|
||||
pip-selfcheck.json
|
||||
|
||||
### JetBrains template
|
||||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
||||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||
|
||||
# User-specific stuff
|
||||
.idea/**/workspace.xml
|
||||
.idea/**/tasks.xml
|
||||
.idea/**/usage.statistics.xml
|
||||
.idea/**/dictionaries
|
||||
.idea/**/shelf
|
||||
|
||||
# AWS User-specific
|
||||
.idea/**/aws.xml
|
||||
|
||||
# Generated files
|
||||
.idea/**/contentModel.xml
|
||||
|
||||
# Sensitive or high-churn files
|
||||
.idea/**/dataSources/
|
||||
.idea/**/dataSources.ids
|
||||
.idea/**/dataSources.local.xml
|
||||
.idea/**/sqlDataSources.xml
|
||||
.idea/**/dynamic.xml
|
||||
.idea/**/uiDesigner.xml
|
||||
.idea/**/dbnavigator.xml
|
||||
|
||||
# Gradle
|
||||
.idea/**/gradle.xml
|
||||
.idea/**/libraries
|
||||
|
||||
# Gradle and Maven with auto-import
|
||||
# When using Gradle or Maven with auto-import, you should exclude module files,
|
||||
# since they will be recreated, and may cause churn. Uncomment if using
|
||||
# auto-import.
|
||||
# .idea/artifacts
|
||||
# .idea/compiler.xml
|
||||
# .idea/jarRepositories.xml
|
||||
# .idea/modules.xml
|
||||
# .idea/*.iml
|
||||
# .idea/modules
|
||||
# *.iml
|
||||
# *.ipr
|
||||
|
||||
# CMake
|
||||
cmake-build-*/
|
||||
|
||||
# Mongo Explorer plugin
|
||||
.idea/**/mongoSettings.xml
|
||||
|
||||
# File-based project format
|
||||
*.iws
|
||||
|
||||
# IntelliJ
|
||||
out/
|
||||
|
||||
# mpeltonen/sbt-idea plugin
|
||||
.idea_modules/
|
||||
|
||||
# JIRA plugin
|
||||
atlassian-ide-plugin.xml
|
||||
|
||||
# Cursive Clojure plugin
|
||||
.idea/replstate.xml
|
||||
|
||||
# SonarLint plugin
|
||||
.idea/sonarlint/
|
||||
|
||||
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||
com_crashlytics_export_strings.xml
|
||||
crashlytics.properties
|
||||
crashlytics-build.properties
|
||||
fabric.properties
|
||||
|
||||
# Editor-based Rest Client
|
||||
.idea/httpRequests
|
||||
|
||||
# Android studio 3.1+ serialized cache file
|
||||
.idea/caches/build_file_checksums.ser
|
||||
|
||||
# idea folder, uncomment if you don't need it
|
||||
# .idea
|
||||
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/ranczo-energy-usage-scraper.iml" filepath="$PROJECT_DIR$/.idea/ranczo-energy-usage-scraper.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
8
.idea/ranczo-energy-usage-scraper.iml
Normal file
8
.idea/ranczo-energy-usage-scraper.iml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
23
DistributionCost.py
Normal file
23
DistributionCost.py
Normal file
@ -0,0 +1,23 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
import zoneinfo
|
||||
|
||||
from utils.time_helpers import WARSAW_TZ
|
||||
|
||||
@dataclass
|
||||
class DistributionCostBase:
|
||||
tz: zoneinfo.ZoneInfo = WARSAW_TZ
|
||||
|
||||
def to_local_dt(self, ts: datetime) -> datetime:
|
||||
if ts.tzinfo is None:
|
||||
return ts.replace(tzinfo=self.tz)
|
||||
return ts.astimezone(self.tz)
|
||||
|
||||
def rate(self, ts: datetime) -> float:
|
||||
"""Return PLN/kWh (netto) for given timestamp (override in subclasses)."""
|
||||
raise NotImplementedError
|
||||
|
||||
def cost(self, ts: datetime, consumption_kwh: float) -> float:
|
||||
return self.rate(ts) * float(consumption_kwh)
|
||||
|
||||
34
DistributionCostFactory.py
Normal file
34
DistributionCostFactory.py
Normal file
@ -0,0 +1,34 @@
|
||||
from __future__ import annotations
|
||||
import importlib
|
||||
from typing import Any, Type, cast
|
||||
|
||||
from DistributionCost import DistributionCostBase
|
||||
|
||||
def create(name: str, /, **kwargs: Any) -> DistributionCostBase:
|
||||
"""
|
||||
Instantiate provider by name using a simple convention:
|
||||
module: DistributionProvider.<Name>Provider
|
||||
class: <Name>Provider
|
||||
Example: create("TauronG13", rates=...)
|
||||
-> DistributionCostProvider.TauronG13Provider.TauronG13Provider(...)
|
||||
"""
|
||||
safe = "".join(ch for ch in name if ch.isalnum() or ch == "_")
|
||||
module_name = f"DistributionCostProvider.{safe}Provider"
|
||||
class_name = f"{safe}Provider"
|
||||
|
||||
try:
|
||||
mod = importlib.import_module(module_name)
|
||||
except ModuleNotFoundError as e:
|
||||
raise ValueError(f"Provider module '{module_name}' not found for name '{name}'.") from e
|
||||
|
||||
try:
|
||||
cls: Type = getattr(mod, class_name)
|
||||
except AttributeError as e:
|
||||
raise ValueError(f"Provider class '{class_name}' not found in '{module_name}'.") from e
|
||||
|
||||
# be sure that this is a subclass of DistributionCostBase
|
||||
if not issubclass(cls, DistributionCostBase):
|
||||
raise TypeError(f"{class_name} does not derive from DistributionCostBase")
|
||||
|
||||
ProviderCls = cast(type[DistributionCostBase], cls)
|
||||
return ProviderCls(**kwargs) # type: ignore[call-arg]
|
||||
14
DistributionCostProvider/TauronG11Provider.py
Normal file
14
DistributionCostProvider/TauronG11Provider.py
Normal file
@ -0,0 +1,14 @@
|
||||
from __future__ import annotations
|
||||
from datetime import datetime
|
||||
from DistributionCost import DistributionCostBase
|
||||
|
||||
from logging_utils import HasLogger
|
||||
|
||||
# ---------- TAURON G11 ----------
|
||||
class TauronG11Provider(DistributionCostBase, HasLogger):
|
||||
def __init__(self):
|
||||
self._init_logger()
|
||||
|
||||
def rate(self, ts: datetime) -> float:
|
||||
return 0.504
|
||||
|
||||
33
DistributionCostProvider/TauronG12Provider.py
Normal file
33
DistributionCostProvider/TauronG12Provider.py
Normal file
@ -0,0 +1,33 @@
|
||||
from __future__ import annotations
|
||||
from datetime import time, datetime
|
||||
from DistributionCost import DistributionCostBase
|
||||
from utils.time_helpers import in_range_local, TimeRange
|
||||
|
||||
|
||||
# ---------- TAURON G12 ----------
|
||||
class TauronG12Provider(DistributionCostBase):
|
||||
"""
|
||||
Dzień 0,3310 zł/kWh netto
|
||||
Noc 0,0994 zł/kWh netto
|
||||
"""
|
||||
low_1: TimeRange = (time(22,0), time(6,0)) # over midnight
|
||||
low_2: TimeRange = (time(13,0), time(15,0))
|
||||
high_1: TimeRange = (time(6,0), time(13,0))
|
||||
high_2: TimeRange = (time(15,0), time(22,0))
|
||||
|
||||
def __init__(self):
|
||||
self.rates={
|
||||
"high" : 0.3310, #
|
||||
"low" : 0.0994
|
||||
}
|
||||
|
||||
def rate(self, ts: datetime) -> float:
|
||||
dt = self.to_local_dt(ts)
|
||||
t = dt.time()
|
||||
|
||||
if in_range_local(t, self.low_1) or in_range_local(t, self.low_2):
|
||||
return self.rates["low"]
|
||||
if in_range_local(t, self.high_1) or in_range_local(t, self.high_2):
|
||||
return self.rates["high"]
|
||||
return self.rates["high"]
|
||||
|
||||
16
DistributionCostProvider/TauronG12WProvider.py
Normal file
16
DistributionCostProvider/TauronG12WProvider.py
Normal file
@ -0,0 +1,16 @@
|
||||
from __future__ import annotations
|
||||
from DistributionCostProvider.TauronG12Provider import TauronG12Provider
|
||||
from utils.calendar_pl import is_weekend_or_holiday
|
||||
|
||||
|
||||
# ---------- TAURON G12w ----------
|
||||
class TauronG12WProvider(TauronG12Provider):
|
||||
"""
|
||||
Like G12 on weekdays; whole weekends & holidays are 'low'.
|
||||
rates={'low':..., 'high':...}
|
||||
"""
|
||||
def rate(self, ts):
|
||||
dt = self.to_local_dt(ts)
|
||||
if is_weekend_or_holiday(dt.date()):
|
||||
return self.rates["low"]
|
||||
return super().rate(ts)
|
||||
68
DistributionCostProvider/TauronG13Provider.py
Normal file
68
DistributionCostProvider/TauronG13Provider.py
Normal file
@ -0,0 +1,68 @@
|
||||
from __future__ import annotations
|
||||
from datetime import time, date, datetime
|
||||
from DistributionCost import DistributionCostBase
|
||||
from utils.time_helpers import in_range_local, TimeRange
|
||||
from utils.calendar_pl import is_weekend_or_holiday
|
||||
|
||||
|
||||
# ---------- TAURON G13 ----------
|
||||
class TauronG13Provider(DistributionCostBase):
|
||||
"""
|
||||
Szczyt przedpołudniowy
|
||||
0,2826 zł/kWh brutto
|
||||
0,2298 zł/kWh netto
|
||||
Szczyt popołudniowy
|
||||
0,4645 zł/kWh brutto
|
||||
0,3777 zł/kWh netto
|
||||
Pozostałe godziny
|
||||
0,0911 zł/kWh brutto
|
||||
0,0741 zł/kWh netto
|
||||
"""
|
||||
winter_med: TimeRange = (time(7,0), time(13,0))
|
||||
winter_high: TimeRange = (time(16,0), time(21,0))
|
||||
winter_low_1: TimeRange = (time(13,0), time(16,0))
|
||||
winter_low_2: TimeRange = (time(21,0), time(7,0)) # over midnight
|
||||
|
||||
summer_med: TimeRange = (time(7,0), time(13,0))
|
||||
summer_high: TimeRange = (time(19,0), time(22,0))
|
||||
summer_low_1: TimeRange = (time(13,0), time(19,0))
|
||||
summer_low_2: TimeRange = (time(22,0), time(7,0)) # over midnight
|
||||
|
||||
def __init__(self):
|
||||
self.rates={
|
||||
"high" : 0.3777, # ~0,465 brutto
|
||||
"med" : 0.2298,
|
||||
"low" : 0.0741
|
||||
}
|
||||
|
||||
def _is_winter(self, d: date) -> bool:
|
||||
return d.month in (10, 11, 12, 1, 2, 3)
|
||||
|
||||
def rate(self, ts: datetime) -> float:
|
||||
dt = self.to_local_dt(ts)
|
||||
d, t = dt.date(), dt.time()
|
||||
|
||||
# weekend/holiday → always 'low'
|
||||
if is_weekend_or_holiday(d):
|
||||
return self.rates["low"]
|
||||
|
||||
if self._is_winter(d):
|
||||
if in_range_local(t, self.winter_high):
|
||||
key = "high"
|
||||
elif in_range_local(t, self.winter_med):
|
||||
key = "med"
|
||||
elif in_range_local(t, self.winter_low_1) or in_range_local(t, self.winter_low_2):
|
||||
key = "low"
|
||||
else:
|
||||
key = "low"
|
||||
else:
|
||||
if in_range_local(t, self.summer_high):
|
||||
key = "high"
|
||||
elif in_range_local(t, self.summer_med):
|
||||
key = "med"
|
||||
elif in_range_local(t, self.summer_low_1) or in_range_local(t, self.summer_low_2):
|
||||
key = "low"
|
||||
else:
|
||||
key = "low"
|
||||
|
||||
return self.rates[key]
|
||||
91
DistributionCostProvider/TauronG13SProvider.py
Normal file
91
DistributionCostProvider/TauronG13SProvider.py
Normal file
@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from datetime import time, datetime
|
||||
from typing import Dict, Literal
|
||||
|
||||
from DistributionCost import DistributionCostBase
|
||||
from utils.time_helpers import in_range_local
|
||||
from utils.calendar_pl import is_weekend_or_holiday
|
||||
|
||||
Season = Literal["summer", "winter"]
|
||||
DayType = Literal["workday", "dayoff"]
|
||||
Band = Literal["peak", "offpeak_day", "night"]
|
||||
|
||||
RatesDict = Dict[Season, Dict[DayType, Dict[Band, float]]]
|
||||
@dataclass
|
||||
class TauronG13SProvider(DistributionCostBase):
|
||||
"""
|
||||
TAURON Dystrybucja – taryfa G13s (od 1 lipca 2025).
|
||||
|
||||
Strefy czasowe (Europe/Warsaw):
|
||||
* Noc: 21:00–07:00 (cały rok)
|
||||
* Lato: 07:00–09:00 (szczyt), 09:00–17:00 (pozaszczyt), 17:00–21:00 (szczyt)
|
||||
* Zima: 07:00–10:00 (szczyt), 10:00–15:00 (pozaszczyt), 15:00–21:00 (szczyt)
|
||||
|
||||
Ceny różnią się także rodzajem dnia:
|
||||
- workday = pon–pt z wyłączeniem świąt,
|
||||
- dayoff = sobota, niedziela i dni ustawowo wolne od pracy.
|
||||
|
||||
Oczekuje stawek NETTO (PLN/kWh) przekazanych w `rates` według struktury RatesDict.
|
||||
"""
|
||||
rates: RatesDict = None # wstrzykuj w konstruktorze
|
||||
|
||||
# stałe zakresy godzin
|
||||
NIGHT: tuple[time, time] = (time(21, 0), time(7, 0)) # przez północ
|
||||
|
||||
SUMMER_PEAK_1: tuple[time, time] = (time(7, 0), time(9, 0))
|
||||
SUMMER_OFFDAY: tuple[time, time] = (time(9, 0), time(17, 0))
|
||||
SUMMER_PEAK_2: tuple[time, time] = (time(17, 0), time(21, 0))
|
||||
|
||||
WINTER_PEAK_1: tuple[time, time] = (time(7, 0), time(10, 0))
|
||||
WINTER_OFFDAY: tuple[time, time] = (time(10, 0), time(15, 0))
|
||||
WINTER_PEAK_2: tuple[time, time] = (time(15, 0), time(21, 0))
|
||||
|
||||
def __init__(self):
|
||||
self.rates= {
|
||||
"winter":
|
||||
{
|
||||
"dayoff": { "peak" : 0.20, "offpeak_day" : 0.12, "night" : 0.11 },
|
||||
"workday":{ "peak" : 0.24, "offpeak_day" : 0.2, "night" : 0.11 }
|
||||
},
|
||||
"summer":
|
||||
{
|
||||
"dayoff": { "peak": 0.12, "offpeak_day": 0.04, "night": 0.11},
|
||||
"workday": { "peak": 0.29, "offpeak_day": 0.1, "night": 0.11 }
|
||||
}
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _season(d: datetime.date) -> Season:
|
||||
return "summer" if 4 <= d.month <= 9 else "winter"
|
||||
|
||||
@staticmethod
|
||||
def _daytype(d: datetime.date) -> DayType:
|
||||
return "dayoff" if is_weekend_or_holiday(d) else "workday"
|
||||
|
||||
def _band(self, t: time, season: Season) -> Band:
|
||||
if in_range_local(t, self.NIGHT):
|
||||
return "night"
|
||||
|
||||
if season == "summer":
|
||||
if in_range_local(t, self.SUMMER_PEAK_1) or in_range_local(t, self.SUMMER_PEAK_2):
|
||||
return "peak"
|
||||
if in_range_local(t, self.SUMMER_OFFDAY):
|
||||
return "offpeak_day"
|
||||
else: # winter
|
||||
if in_range_local(t, self.WINTER_PEAK_1) or in_range_local(t, self.WINTER_PEAK_2):
|
||||
return "peak"
|
||||
if in_range_local(t, self.WINTER_OFFDAY):
|
||||
return "offpeak_day"
|
||||
|
||||
return "night"
|
||||
|
||||
def rate(self, ts: datetime) -> float:
|
||||
dt = self.to_local_dt(ts)
|
||||
season = self._season(dt.date())
|
||||
daytype = self._daytype(dt.date())
|
||||
band = self._band(dt.time(), season)
|
||||
try:
|
||||
return self.rates[season][daytype][band]
|
||||
except KeyError as e:
|
||||
raise KeyError(f"no price for [{season}][{daytype}][{band}]") from e
|
||||
1
DistributionCostProvider/__init__.py
Normal file
1
DistributionCostProvider/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# empty (kept for package import); discovery is done by the factory via importlib
|
||||
29
EnergyPrice.py
Normal file
29
EnergyPrice.py
Normal file
@ -0,0 +1,29 @@
|
||||
# EnergyPrice.py
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
import zoneinfo
|
||||
from utils.time_helpers import WARSAW_TZ
|
||||
|
||||
@dataclass
|
||||
class EnergyPriceBase:
|
||||
tz: zoneinfo.ZoneInfo = WARSAW_TZ
|
||||
|
||||
def to_local_dt(self, ts: datetime) -> datetime:
|
||||
if ts.tzinfo is None:
|
||||
return ts.replace(tzinfo=self.tz)
|
||||
return ts.astimezone(self.tz)
|
||||
|
||||
def has_rate(self, ts: datetime) -> bool:
|
||||
try:
|
||||
self.rate(ts)
|
||||
return True
|
||||
except KeyError:
|
||||
return False
|
||||
|
||||
def rate(self, ts: datetime) -> float:
|
||||
"""Return PLN/kWh (net) for given timestamp (override in subclasses)."""
|
||||
raise NotImplementedError
|
||||
|
||||
def cost(self, ts: datetime, consumption_kwh: float) -> float:
|
||||
return self.rate(ts) * float(consumption_kwh)
|
||||
32
EnergyPriceFactory.py
Normal file
32
EnergyPriceFactory.py
Normal file
@ -0,0 +1,32 @@
|
||||
# EnergyPriceFactory.py
|
||||
from __future__ import annotations
|
||||
import importlib
|
||||
from typing import Any, cast, Type
|
||||
from EnergyPrice import EnergyPriceBase
|
||||
|
||||
def create(name: str, /, **kwargs: Any) -> EnergyPriceBase:
|
||||
"""
|
||||
Convention:
|
||||
module: EnergyPriceProvider.<Name>Provider
|
||||
class: <Name>Provider
|
||||
Example: create("TauronG13", rates={...})
|
||||
"""
|
||||
safe = "".join(ch for ch in name if ch.isalnum() or ch == "_")
|
||||
module_name = f"EnergyPriceProvider.{safe}Provider"
|
||||
class_name = f"{safe}Provider"
|
||||
|
||||
try:
|
||||
mod = importlib.import_module(module_name)
|
||||
except ModuleNotFoundError as e:
|
||||
raise ValueError(f"Provider module not found: {module_name}") from e
|
||||
|
||||
try:
|
||||
cls = getattr(mod, class_name)
|
||||
except AttributeError as e:
|
||||
raise ValueError(f"Provider class not found: {class_name} in {module_name}") from e
|
||||
|
||||
if not issubclass(cls, EnergyPriceBase):
|
||||
raise TypeError(f"{class_name} must inherit EnergyCostBase")
|
||||
|
||||
ProviderCls = cast(Type[EnergyPriceBase], cls)
|
||||
return ProviderCls(**kwargs) # type: ignore[arg-type]
|
||||
113
EnergyPriceProvider/DynamicPricesProvider.py
Normal file
113
EnergyPriceProvider/DynamicPricesProvider.py
Normal file
@ -0,0 +1,113 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
import psycopg
|
||||
from EnergyPrice import EnergyPriceBase
|
||||
|
||||
WAW = ZoneInfo("Europe/Warsaw")
|
||||
Interval = Tuple[datetime, datetime, float] # (ts_start, ts_end, price_pln_net)
|
||||
|
||||
@dataclass
|
||||
class DynamicPricesProvider(EnergyPriceBase):
|
||||
"""
|
||||
Bazowa klasa dla cen dynamicznych trzymanych w pricing.energy_prices (Timescale).
|
||||
- rate(ts): zwraca cenę (PLN/kWh netto) dla podanej chwili.
|
||||
Przy pierwszym wywołaniu dla danej doby – ładuje CAŁĄ dobę do cache.
|
||||
- has_rate(ts): domyślnie True (możesz nadpisać w podklasie, jeśli chcesz sprawdzać realną obecność danych).
|
||||
- Subklasy definiują PROVIDER i KIND (np. 'TGE' / 'fixing_I').
|
||||
"""
|
||||
dsn: Optional[str] = None # np. "postgresql://user:pass@localhost:5432/postgres"
|
||||
conn: Optional[psycopg.Connection] = None # albo podaj gotowe połączenie psycopg3
|
||||
table: str = "pricing.energy_prices"
|
||||
tz: ZoneInfo = WAW
|
||||
max_cached_days: int = 14 # ile różnych dób trzymać w cache
|
||||
|
||||
# identyfikatory – nadpisujesz w podklasie
|
||||
SIDE: str = "" # 'buy' albo 'sell'
|
||||
PROVIDER: str = ""
|
||||
KIND: str = ""
|
||||
|
||||
# prosty cache: klucz = początek doby (local), wartość = lista interwałów (start, end, price)
|
||||
_cache: Dict[datetime, List[Interval]] = field(default_factory=dict, init=False, repr=False)
|
||||
_cache_order: List[datetime] = field(default_factory=list, init=False, repr=False)
|
||||
|
||||
# ---------- public API ----------
|
||||
def provider(self) -> str:
|
||||
if not self.PROVIDER:
|
||||
raise NotImplementedError("Subclass must define PROVIDER")
|
||||
return self.PROVIDER
|
||||
|
||||
def kind(self) -> str:
|
||||
if not self.KIND:
|
||||
raise NotImplementedError("Subclass must define KIND")
|
||||
return self.KIND
|
||||
|
||||
def side(self) -> str:
|
||||
if not self.SIDE:
|
||||
raise NotImplementedError("Subclass must define SIDE")
|
||||
return self.SIDE
|
||||
|
||||
def rate(self, ts: datetime) -> float:
|
||||
"""Zwraca cenę netto PLN/kWh dla chwili ts. Ładuje cały dzień do cache przy pierwszym wywołaniu."""
|
||||
dt = self._to_local(ts)
|
||||
day_key = self._day_key(dt)
|
||||
self._ensure_day_cached(day_key)
|
||||
for start, end, price in self._cache.get(day_key, []):
|
||||
if start <= dt < end:
|
||||
return price
|
||||
raise KeyError(f"No price for {dt.isoformat()} (provider={self.provider()}, kind={self.kind()})")
|
||||
|
||||
def preload_day(self, day: datetime | None = None):
|
||||
"""Opcjonalnie: prefetch doby (początek dnia lokalnie)."""
|
||||
day_key = self._day_key(self._to_local(day or datetime.now(tz=self.tz)))
|
||||
self._ensure_day_cached(day_key)
|
||||
|
||||
def clear_cache(self):
|
||||
self._cache.clear()
|
||||
self._cache_order.clear()
|
||||
|
||||
# ---------- internals ----------
|
||||
def _to_local(self, ts: datetime) -> datetime:
|
||||
return ts.replace(tzinfo=self.tz) if ts.tzinfo is None else ts.astimezone(self.tz)
|
||||
|
||||
def _day_key(self, ts_local: datetime) -> datetime:
|
||||
return ts_local.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
|
||||
def _ensure_conn(self) -> psycopg.Connection:
|
||||
if self.conn is not None:
|
||||
return self.conn
|
||||
if not self.dsn:
|
||||
raise RuntimeError("Provide dsn= or conn= to DynamicPricesProvider")
|
||||
self.conn = psycopg.connect(self.dsn)
|
||||
return self.conn
|
||||
|
||||
def _ensure_day_cached(self, day_start_local: datetime):
|
||||
if day_start_local in self._cache:
|
||||
return
|
||||
self._cache[day_start_local] = self._fetch_day(day_start_local)
|
||||
self._cache_order.append(day_start_local)
|
||||
# prosty limit cache po liczbie dni
|
||||
while len(self._cache_order) > self.max_cached_days:
|
||||
oldest = self._cache_order.pop(0)
|
||||
self._cache.pop(oldest, None)
|
||||
|
||||
def _fetch_day(self, day_start_local: datetime) -> List[Interval]:
|
||||
"""Pobiera z DB wszystkie rekordy nachodzące na [day_start, day_start+1d)."""
|
||||
day_end_local = day_start_local + timedelta(days=1)
|
||||
sql = f"""
|
||||
SELECT ts_start, ts_end, price_pln_net
|
||||
FROM {self.table}
|
||||
WHERE provider = %s
|
||||
AND kind = %s
|
||||
AND side = %s
|
||||
AND tstzrange(ts_start, ts_end, '[)') && tstzrange(%s::timestamptz, %s::timestamptz, '[)')
|
||||
ORDER BY ts_start
|
||||
"""
|
||||
|
||||
with self._ensure_conn().cursor() as cur:
|
||||
cur.execute(sql, (self.provider(), self.kind(), self.side(), day_start_local, day_end_local))
|
||||
rows = cur.fetchall()
|
||||
# rows: List[Tuple[datetime, datetime, Decimal]]
|
||||
return [(r[0], r[1], float(r[2])) for r in rows]
|
||||
38
EnergyPriceProvider/RDNProvider.py
Normal file
38
EnergyPriceProvider/RDNProvider.py
Normal file
@ -0,0 +1,38 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Callable, Mapping, Optional
|
||||
from EnergyPrice import EnergyPriceBase
|
||||
|
||||
PriceLookup = Callable[[datetime], Optional[float]]
|
||||
|
||||
@dataclass
|
||||
class RDNProvider(EnergyPriceBase):
|
||||
"""
|
||||
Skeleton for market-based energy cost (RDN).
|
||||
Choose ONE feeding strategy:
|
||||
1) price_lookup: callable(ts_local) -> PLN/kWh (net) or None
|
||||
2) mapping: dict/Mapping[datetime->PLN/kWh] with tz-aware keys (Europe/Warsaw)
|
||||
Optional: period controls how you round/align (default: 1h).
|
||||
"""
|
||||
price_lookup: PriceLookup | None = None
|
||||
mapping: Mapping[datetime, float] | None = None
|
||||
period: timedelta = timedelta(hours=1)
|
||||
|
||||
def _get_price(self, ts_local: datetime) -> float:
|
||||
if self.price_lookup:
|
||||
val = self.price_lookup(ts_local)
|
||||
if val is None:
|
||||
raise KeyError(f"No RDN price for {ts_local.isoformat()}")
|
||||
return float(val)
|
||||
if self.mapping is not None:
|
||||
# align to exact period start (e.g., hour start)
|
||||
aligned = ts_local.replace(minute=0, second=0, microsecond=0)
|
||||
if aligned in self.mapping:
|
||||
return float(self.mapping[aligned])
|
||||
raise KeyError(f"No RDN price in mapping for {aligned.isoformat()}")
|
||||
raise RuntimeError("RDNProvider needs price_lookup or mapping")
|
||||
|
||||
def rate(self, ts: datetime) -> float:
|
||||
dt = self.to_local_dt(ts)
|
||||
return self._get_price(dt)
|
||||
13
EnergyPriceProvider/TauronG11Provider.py
Normal file
13
EnergyPriceProvider/TauronG11Provider.py
Normal file
@ -0,0 +1,13 @@
|
||||
from __future__ import annotations
|
||||
from EnergyPrice import EnergyPriceBase
|
||||
from utils.calendar_pl import gov_energy_prices_shield
|
||||
|
||||
class TauronG11Provider(EnergyPriceBase):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def rate(self, ts):
|
||||
if gov_energy_prices_shield(ts):
|
||||
return 0.62/1.23
|
||||
else:
|
||||
raise RuntimeError("brak danych dla nie tarczy, trzeba ogarnąć")
|
||||
33
EnergyPriceProvider/TauronG12Provider.py
Normal file
33
EnergyPriceProvider/TauronG12Provider.py
Normal file
@ -0,0 +1,33 @@
|
||||
from __future__ import annotations
|
||||
from datetime import time, datetime
|
||||
from EnergyPrice import EnergyPriceBase
|
||||
from utils.time_helpers import in_range_local
|
||||
from utils.calendar_pl import gov_energy_prices_shield
|
||||
|
||||
class TauronG12Provider(EnergyPriceBase):
|
||||
low1 = (time(22,0), time(6,0))
|
||||
low2 = (time(13,0), time(15,0))
|
||||
high1 = (time(6,0), time(13,0))
|
||||
high2 = (time(15,0), time(22,0))
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _rates(ts: datetime):
|
||||
if gov_energy_prices_shield(ts):
|
||||
tarcza_rates = 0.62 / 1.23
|
||||
return {
|
||||
"low": 0.57/1.23, "high": tarcza_rates
|
||||
}
|
||||
else:
|
||||
raise RuntimeError("brak danych dla nie tarczy, trzeba ogarnąć")
|
||||
|
||||
def rate(self, ts):
|
||||
rates = self._rates(ts)
|
||||
t = self.to_local_dt(ts).time()
|
||||
if in_range_local(t, self.low1) or in_range_local(t, self.low2):
|
||||
return rates["low"]
|
||||
if in_range_local(t, self.high1) or in_range_local(t, self.high2):
|
||||
return rates["high"]
|
||||
return rates["high"]
|
||||
14
EnergyPriceProvider/TauronG12WProvider.py
Normal file
14
EnergyPriceProvider/TauronG12WProvider.py
Normal file
@ -0,0 +1,14 @@
|
||||
from __future__ import annotations
|
||||
from EnergyPriceProvider.TauronG12Provider import TauronG12Provider
|
||||
from utils.calendar_pl import is_weekend_or_holiday
|
||||
|
||||
class TauronG12WProvider(TauronG12Provider):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def rate(self, ts):
|
||||
dt = self.to_local_dt(ts)
|
||||
if is_weekend_or_holiday(dt.date()):
|
||||
rates = self._rates(ts)
|
||||
return rates["low"]
|
||||
return super().rate(ts)
|
||||
72
EnergyPriceProvider/TauronG13Provider.py
Normal file
72
EnergyPriceProvider/TauronG13Provider.py
Normal file
@ -0,0 +1,72 @@
|
||||
from __future__ import annotations
|
||||
from datetime import time, datetime
|
||||
from EnergyPrice import EnergyPriceBase
|
||||
from utils.time_helpers import in_range_local
|
||||
from utils.calendar_pl import is_weekend_or_holiday, gov_energy_prices_shield
|
||||
|
||||
class TauronG13Provider(EnergyPriceBase):
|
||||
"""
|
||||
Energy cost – Tauron G13 (NET PLN/kWh).
|
||||
Zima (X–III):
|
||||
medium: 07–13
|
||||
high: 16–21
|
||||
low: 13–16, 21–07; weekend/święta = low
|
||||
Lato (IV–IX):
|
||||
medium: 07–13
|
||||
high: 19–22
|
||||
low: 13–19, 22–07; weekend/święta = low
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.w_med = (time(7,0), time(13,0))
|
||||
self.w_high = (time(16,0), time(21,0))
|
||||
self.w_low1 = (time(13,0), time(16,0))
|
||||
self.w_low2 = (time(21,0), time(7,0))
|
||||
|
||||
self.s_med = (time(7,0), time(13,0))
|
||||
self.s_high = (time(19,0), time(22,0))
|
||||
self.s_low1 = (time(13,0), time(19,0))
|
||||
self.s_low2 = (time(22,0), time(7,0))
|
||||
|
||||
@staticmethod
|
||||
def _rates(ts: datetime):
|
||||
if gov_energy_prices_shield(ts):
|
||||
tarcza_rates = 0.62/1.23
|
||||
return {
|
||||
"summer": { # IV–IX
|
||||
"workday": {"low": tarcza_rates, "medium": tarcza_rates, "high": tarcza_rates},
|
||||
"dayoff": {"low": tarcza_rates, "medium": tarcza_rates, "high": tarcza_rates},
|
||||
},
|
||||
"winter": { # X–III
|
||||
"workday": {"low": tarcza_rates, "medium": tarcza_rates, "high": tarcza_rates},
|
||||
"dayoff": {"low": tarcza_rates, "medium": tarcza_rates, "high": tarcza_rates},
|
||||
},
|
||||
}
|
||||
else:
|
||||
raise RuntimeError("brak danych dla nie tarczy, trzeba ogarnąć")
|
||||
|
||||
@staticmethod
|
||||
def _is_winter(month: int) -> bool:
|
||||
return month in (10, 11, 12, 1, 2, 3)
|
||||
|
||||
def rate(self, ts):
|
||||
dt = self.to_local_dt(ts)
|
||||
d, t = dt.date(), dt.time()
|
||||
rates = self._rates(ts)
|
||||
|
||||
if is_weekend_or_holiday(d):
|
||||
return rates["low"]
|
||||
|
||||
if self._is_winter(d.month):
|
||||
if in_range_local(t, self.w_high): key = "high"
|
||||
elif in_range_local(t, self.w_med): key = "medium"
|
||||
elif in_range_local(t, self.w_low1) or in_range_local(t, self.w_low2): key = "low"
|
||||
else: key = "low"
|
||||
else:
|
||||
if in_range_local(t, self.s_high): key = "high"
|
||||
elif in_range_local(t, self.s_med): key = "medium"
|
||||
elif in_range_local(t, self.s_low1) or in_range_local(t, self.s_low2): key = "low"
|
||||
else: key = "low"
|
||||
|
||||
return rates[key]
|
||||
1
EnergyPriceProvider/__init__.py
Normal file
1
EnergyPriceProvider/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# (left intentionally empty; factory imports modules dynamically)
|
||||
163
EnergyPriceScraper.py
Normal file
163
EnergyPriceScraper.py
Normal file
@ -0,0 +1,163 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, date, timedelta, timezone
|
||||
from typing import Iterable, List, Tuple, Dict, Any, Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
import json
|
||||
import psycopg
|
||||
from utils.time_helpers import WARSAW_TZ
|
||||
|
||||
IntervalRow = Tuple[datetime, datetime, float, str, str, str, str, str, str] # patrz _rows_to_upsert
|
||||
|
||||
UPSERT_SQL = """
|
||||
INSERT INTO pricing.energy_prices
|
||||
(ts_start, ts_end, price_pln_net, provider, kind, side, buyer, seller, source_meta)
|
||||
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
||||
ON CONFLICT (ts_start, ts_end, provider, kind, side)
|
||||
DO UPDATE SET
|
||||
price_pln_net = EXCLUDED.price_pln_net,
|
||||
buyer = EXCLUDED.buyer,
|
||||
seller = EXCLUDED.seller,
|
||||
source_meta = COALESCE(pricing.energy_prices.source_meta, '{}'::jsonb)
|
||||
|| COALESCE(EXCLUDED.source_meta, '{}'::jsonb),
|
||||
inserted_at = now()
|
||||
WHERE
|
||||
pricing.energy_prices.price_pln_net IS DISTINCT FROM EXCLUDED.price_pln_net
|
||||
OR pricing.energy_prices.buyer IS DISTINCT FROM EXCLUDED.buyer
|
||||
OR pricing.energy_prices.seller IS DISTINCT FROM EXCLUDED.seller
|
||||
OR (COALESCE(pricing.energy_prices.source_meta, '{}'::jsonb)
|
||||
|| COALESCE(EXCLUDED.source_meta, '{}'::jsonb))
|
||||
IS DISTINCT FROM COALESCE(pricing.energy_prices.source_meta, '{}'::jsonb)
|
||||
RETURNING 1 AS affected, (xmax <> 0) AS was_update;"""
|
||||
|
||||
SELECT_LAST = """
|
||||
select ts_end from pricing.energy_prices ep
|
||||
where
|
||||
ep.provider = %s
|
||||
and ep.kind = %s
|
||||
and ep.side = %s::price_side
|
||||
and ep.buyer = %s
|
||||
and ep.seller = %s
|
||||
order by ts_start desc
|
||||
limit 1
|
||||
"""
|
||||
|
||||
@dataclass
|
||||
class EnergyPriceScraperBase:
|
||||
"""Bazowa klasa dla scraperów rynkowych (zbieranie → normalizacja → UPSERT)."""
|
||||
dsn: Optional[str] = None
|
||||
conn: Optional[psycopg.Connection] = None
|
||||
tz: ZoneInfo = WARSAW_TZ
|
||||
period: timedelta = timedelta(hours=1)
|
||||
# identyfikatory – NADPISZ w podklasie:
|
||||
PROVIDER: str = "" # np. 'PSE' / 'instrat' / 'PSTRYK'
|
||||
KIND: str = "" # np. 'rce' / 'fixing_I' / 'market_price'
|
||||
SIDE: str = "" # 'buy'|'sell'
|
||||
BUYER: str = "" # 'end_user'
|
||||
SELLER: str = "" # 'market_index'
|
||||
log = None
|
||||
|
||||
# throttling/retry
|
||||
max_retries: int = 3
|
||||
backoff_sec: float = 1.0
|
||||
|
||||
# ---------- public API ----------
|
||||
def provider(self) -> str:
|
||||
if not self.PROVIDER:
|
||||
raise NotImplementedError("Subclass must define PROVIDER")
|
||||
return self.PROVIDER
|
||||
|
||||
def kind(self) -> str:
|
||||
if not self.KIND:
|
||||
raise NotImplementedError("Subclass must define KIND")
|
||||
return self.KIND
|
||||
|
||||
def side(self) -> str:
|
||||
if not self.SIDE:
|
||||
raise NotImplementedError("Subclass must define SIDE")
|
||||
return self.SIDE
|
||||
|
||||
def buyer(self) -> str:
|
||||
if not self.BUYER:
|
||||
raise NotImplementedError("Subclass must define BUYER")
|
||||
return self.BUYER
|
||||
|
||||
def seller(self) -> str:
|
||||
if not self.SELLER:
|
||||
raise NotImplementedError("Subclass must define SELLER")
|
||||
return self.SELLER
|
||||
|
||||
# ---------- public API ----------
|
||||
def ingest_day(self, business_day: date) -> int:
|
||||
"""Pobiera i zapisuje całą dobę [00:00, 24:00) lokalnie. Zwraca liczbę upsertowanych wierszy."""
|
||||
self.log.info("Ingesting day {}".format(business_day))
|
||||
points = self.fetch_day(business_day, WARSAW_TZ)
|
||||
rows = self._rows_to_upsert(points)
|
||||
self.log.debug("{} rows inserted".format(len(rows)))
|
||||
return self._upsert(rows)
|
||||
|
||||
def last_entry(self) -> Optional[datetime]:
|
||||
self.log.info("Retrieving last entry")
|
||||
with self._ensure_conn().cursor() as cur:
|
||||
params = (self.provider(), self.kind(), self.side(), self.buyer(), self.seller())
|
||||
cur.execute(SELECT_LAST, params)
|
||||
res = cur.fetchone() # None if no change, tuple if insert/update
|
||||
if res is not None:
|
||||
self.log.debug("last entry {}".format(res))
|
||||
return res[0]
|
||||
|
||||
self.log.debug("No last entry")
|
||||
return None
|
||||
|
||||
def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||
"""Zaimplementuj w podklasie. Zwracaj listę punktów z NETTO PLN/kWh."""
|
||||
raise NotImplementedError
|
||||
|
||||
# ---------- helpers ----------
|
||||
def _ensure_conn(self) -> psycopg.Connection:
|
||||
if self.conn:
|
||||
return self.conn
|
||||
if not self.dsn:
|
||||
raise RuntimeError("Podaj dsn= lub conn= dla PriceScraperBase")
|
||||
self.conn = psycopg.connect(self.dsn)
|
||||
return self.conn
|
||||
|
||||
def _rows_to_upsert(self, points: Iterable[Tuple[datetime, datetime, float, Dict[str, Any]]]) -> List[IntervalRow]:
|
||||
rows: List[IntervalRow] = []
|
||||
self.log.debug("Creating upsert rows from {} points".format(len(points)))
|
||||
for ts_start, ts_end, price_pln_kwh_net, meta in points:
|
||||
# force timezones
|
||||
assert(ts_start.tzname() is not None)
|
||||
assert(ts_end.tzname() is not None)
|
||||
|
||||
rows.append((
|
||||
ts_start, ts_end, float(price_pln_kwh_net),
|
||||
self.provider(), self.kind(), self.side(), self.buyer(), self.seller(),
|
||||
json.dumps(meta or {})
|
||||
))
|
||||
return rows
|
||||
|
||||
def _upsert(self, rows: list[IntervalRow]) -> int:
|
||||
if not rows:
|
||||
return 0
|
||||
|
||||
affected = 0
|
||||
updated = 0
|
||||
|
||||
self.log.info("Upserting %d rows ", len(rows))
|
||||
with self._ensure_conn().cursor() as cur:
|
||||
for r in rows:
|
||||
cur.execute(UPSERT_SQL, r)
|
||||
res = cur.fetchone() # None if no change, tuple if insert/update
|
||||
if res is not None:
|
||||
affected += res[0] # res[0] == 1
|
||||
updated += int(bool(res[1])) # was_update -> 1/0
|
||||
|
||||
self._ensure_conn().commit()
|
||||
self.log.info("Affected=%d (updated=%d, inserted=%d)", affected, updated, affected - updated)
|
||||
return affected
|
||||
|
||||
def _day_range(self, d: date) -> Tuple[datetime, datetime]:
|
||||
start = datetime(d.year, d.month, d.day, 0, 0, tzinfo=self.tz)
|
||||
return start, start + timedelta(days=1)
|
||||
|
||||
48
EnergyPriceScraperFactory.py
Normal file
48
EnergyPriceScraperFactory.py
Normal file
@ -0,0 +1,48 @@
|
||||
# EnergyPriceScraperFactory.py
|
||||
from __future__ import annotations
|
||||
import importlib
|
||||
from typing import Any, cast, Type
|
||||
from EnergyPriceScraper import EnergyPriceScraperBase
|
||||
|
||||
import os
|
||||
import psycopg
|
||||
|
||||
DB_HOST = os.getenv("PGHOST", "192.168.30.10")
|
||||
DB_PORT = int(os.getenv("PGPORT", "5432"))
|
||||
DB_NAME = os.getenv("PGDATABASE", "postgres")
|
||||
DB_USER = os.getenv("PGUSER", "energy_ingest")
|
||||
DB_PASS = os.getenv("PGPASSWORD", "2f1rLCa03mQrbmlCbD6envk")
|
||||
|
||||
def setup_db():
|
||||
# psycopg 3
|
||||
conn = psycopg.connect(
|
||||
host=DB_HOST, port=DB_PORT, dbname=DB_NAME, user=DB_USER, password=DB_PASS
|
||||
)
|
||||
return conn
|
||||
|
||||
def create(name: str, /, **kwargs: Any) -> EnergyPriceScraperBase:
|
||||
"""
|
||||
Convention:
|
||||
module: Scraper.<Name>Scraper
|
||||
class: <Name>Provider
|
||||
Example: create("TauronG13", rates={...})
|
||||
"""
|
||||
safe = "".join(ch for ch in name if ch.isalnum() or ch == "_")
|
||||
module_name = f"Scraper.{safe}Scraper"
|
||||
class_name = f"{safe}Scraper"
|
||||
|
||||
try:
|
||||
mod = importlib.import_module(module_name)
|
||||
except ModuleNotFoundError as e:
|
||||
raise ValueError(f"Scraper module not found: {module_name}") from e
|
||||
|
||||
try:
|
||||
cls = getattr(mod, class_name)
|
||||
except AttributeError as e:
|
||||
raise ValueError(f"Scraper class not found: {class_name} in {module_name}") from e
|
||||
|
||||
if not issubclass(cls, EnergyPriceScraperBase):
|
||||
raise TypeError(f"{class_name} must inherit PriceScraperBase")
|
||||
|
||||
ProviderCls = cast(Type[EnergyPriceScraperBase], cls)
|
||||
return ProviderCls(**kwargs) # type: ignore[arg-type]
|
||||
19
README.md
Normal file
19
README.md
Normal file
@ -0,0 +1,19 @@
|
||||
sudo apt update
|
||||
sudo apt install -y python3-venv python3-pip git tzdata
|
||||
|
||||
# system user without shell login
|
||||
sudo useradd -r -s /usr/sbin/nologin -d /opt/energy-scrapers energy
|
||||
|
||||
copy service to
|
||||
/etc/systemd/system/energy-price-scrapers.service
|
||||
|
||||
sudo install -m 0755 os/energy-price-scrapers-update.sh /usr/local/bin/energy-scrapers-update
|
||||
|
||||
as root
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl enable --now energy-price-scrapers.service
|
||||
systemctl status energy-price-scrapers.service
|
||||
~~~~
|
||||
logi
|
||||
journalctl -u energy-price-scrapers.service -f
|
||||
78
Scraper/InstratRDN_CSVScraper.py
Normal file
78
Scraper/InstratRDN_CSVScraper.py
Normal file
@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
from datetime import datetime, timedelta, date
|
||||
from typing import List, Tuple, Dict, Any
|
||||
import pandas as pd
|
||||
from EnergyPriceScraper import EnergyPriceScraperBase
|
||||
from utils.time_helpers import WARSAW_TZ, UTC
|
||||
|
||||
# CSV pobrane z https://energy.instrat.pl/ceny/energia-rdn-godzinowe/
|
||||
class InstratRDN_CSVScraper(EnergyPriceScraperBase):
|
||||
"""
|
||||
Przykładowy scraper RDN z CSV/JSON (public HTTP).
|
||||
Oczekuje CSV z kolumnami: 'date', 'fixing_i_price' (PLN/MWh) lub już PLN/kWh.
|
||||
"""
|
||||
url: str
|
||||
|
||||
def __init__(self, path: str, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.PROVIDER = "instrat"
|
||||
self.KIND = "fixing_I"
|
||||
self.SIDE = "buy"
|
||||
self.BUYER = "end_user" # sprzedawca rozliczajacy prosumenta
|
||||
self.SELLER = "market_index"
|
||||
|
||||
self.data = self.load_instrat_csv(path)
|
||||
|
||||
def load_instrat_csv(self, path: str) -> pd.DataFrame:
|
||||
"""
|
||||
Wczytuje CSV Instrat z format
|
||||
date,fixing_i_price,fixing_i_volume,fixing_ii_price,fixing_ii_volume
|
||||
01.01.2016 00:00,108.27,2565.10,108.55,89.10
|
||||
"""
|
||||
# 1) Wczytanie z autodetekcją polskiego formatu
|
||||
dateparse = lambda x: datetime.strptime(x, '%d.%m.%Y %H:%M').replace(tzinfo=WARSAW_TZ)
|
||||
df = pd.read_csv(path, parse_dates=['date'], date_parser=dateparse)
|
||||
|
||||
fi_pln_kwh = (df["fixing_i_price"] / 1000.0).round(4)
|
||||
fii_pln_kwh = (df["fixing_ii_price"] / 1000.0).round(4)
|
||||
|
||||
self.out = pd.DataFrame({
|
||||
"fixing_i_pln_kwh": fi_pln_kwh.values,
|
||||
"fixing_ii_pln_kwh": fii_pln_kwh.values,
|
||||
"fixing_i_volume": pd.to_numeric(df.get("fixing_i_volume"), errors="coerce").values,
|
||||
"fixing_ii_volume": pd.to_numeric(df.get("fixing_ii_volume"), errors="coerce").values,
|
||||
}, index=df["date"]).sort_index()
|
||||
|
||||
# sanity check — nie wyszło pusto
|
||||
if self.out[["fixing_i_pln_kwh", "fixing_ii_pln_kwh"]].notna().sum().sum() == 0:
|
||||
raise RuntimeError("Brak cen po przeliczeniu — sprawdź separator/format liczb w CSV.")
|
||||
|
||||
def fetch_day(self, business_day: date) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||
if not hasattr(self, "out"):
|
||||
raise RuntimeError("Brak danych: najpierw wczytaj CSV i zbuduj self.out")
|
||||
|
||||
# wybór kolumny wg KIND (domyślnie Fixing I)
|
||||
kind = getattr(self, "KIND", "fixing_I")
|
||||
kind_norm = str(kind).replace(" ", "_").lower()
|
||||
if "fixing_ii" in kind_norm:
|
||||
col = "fixing_ii_pln_kwh"
|
||||
fixing_tag = "II"
|
||||
else:
|
||||
col = "fixing_i_pln_kwh"
|
||||
fixing_tag = "I"
|
||||
|
||||
day_start = datetime(business_day.year, business_day.month, business_day.day, 0, 0, tzinfo=self.tz)
|
||||
day_end = day_start + timedelta(days=1)
|
||||
|
||||
df_day = self.out.loc[(self.out.index >= day_start) & (self.out.index < day_end)]
|
||||
if col not in df_day.columns:
|
||||
raise KeyError(f"Column '{col}' does not exists")
|
||||
|
||||
points: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
|
||||
for ts, price in df_day[col].dropna().items():
|
||||
ts_end = ts + getattr(self, "period", timedelta(hours=1))
|
||||
points.append((ts.to_pydatetime().astimezone(UTC), ts_end.to_pydatetime().astimezone(UTC), float(price),
|
||||
{"source": "instrat_csv", "unit": "PLN/kWh", "fixing": fixing_tag, "taxes_included": False}))
|
||||
return points
|
||||
|
||||
99
Scraper/PSE_RCEScraper.py
Normal file
99
Scraper/PSE_RCEScraper.py
Normal file
@ -0,0 +1,99 @@
|
||||
from __future__ import annotations
|
||||
from datetime import datetime, timedelta, date, timezone
|
||||
from typing import List, Tuple, Dict, Any
|
||||
import requests
|
||||
|
||||
from EnergyPriceScraper import EnergyPriceScraperBase
|
||||
from utils.time_helpers import UTC, WARSAW_TZ
|
||||
|
||||
from logging_utils import HasLogger
|
||||
|
||||
|
||||
class PSE_RCEScraper(EnergyPriceScraperBase, HasLogger):
|
||||
"""
|
||||
PSE RCE (PLN) – godziny dla danej doby.
|
||||
Zwraca NETTO PLN/kWh (jeżeli RCE jest w PLN/MWh, dzielimy przez 1000).
|
||||
"""
|
||||
api_url: str = "https://api.raporty.pse.pl/api/rce-pln"
|
||||
session: requests.Session
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.PROVIDER = "PSE"
|
||||
self.KIND = "rce"
|
||||
self.SIDE = "sell"
|
||||
self.BUYER = "reteiler"
|
||||
self.SELLER = "prosumer"
|
||||
|
||||
self.init_logger()
|
||||
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({"accept": "application/json"})
|
||||
self.log.info("Initializing PSE RCE Done")
|
||||
|
||||
def fetch_range(self, start_date: datetime, end_date: datetime) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||
assert start_date < end_date
|
||||
assert start_date.tzinfo is not None
|
||||
assert end_date.tzinfo is not None
|
||||
assert start_date.tzinfo == UTC
|
||||
assert end_date.tzinfo == UTC
|
||||
assert end_date - start_date == timedelta(days=1) # for now no way to import more than one day
|
||||
|
||||
self.log.info(f"Fetching range: [{start_date}, {end_date}) UTC / [{start_date.astimezone(WARSAW_TZ)}, {end_date.astimezone(WARSAW_TZ)}) Europe/Warsaw")
|
||||
business_day = start_date.astimezone(WARSAW_TZ).date()
|
||||
self.log.debug(f"business_day: {business_day}")
|
||||
|
||||
# RCE v2: filter by business_date, select rce_pln,dtime,period
|
||||
params = {
|
||||
"$select": "rce_pln,publication_ts_utc,dtime_utc,business_date",
|
||||
"$filter": f"business_date eq '{business_day:%Y-%m-%d}'",
|
||||
}
|
||||
r = self.session.get(self.api_url, params=params, timeout=30)
|
||||
r.raise_for_status()
|
||||
data = r.json().get("value", [])
|
||||
self.log.debug(f"Fetched data len: {len(data)} points")
|
||||
|
||||
out: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
|
||||
for item in data:
|
||||
out.append(self.parse_pse_rce_record(item))
|
||||
|
||||
return out
|
||||
|
||||
def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||
start = datetime(
|
||||
year=business_day.year,
|
||||
month=business_day.month,
|
||||
day=business_day.day,
|
||||
hour=0,
|
||||
minute=0,
|
||||
second=0,
|
||||
tzinfo=tz
|
||||
).astimezone(UTC)
|
||||
end = start + timedelta(days=1)
|
||||
return self.fetch_range(start, end)
|
||||
|
||||
@staticmethod
|
||||
def PSE_date_range(dtime: datetime):
|
||||
ts_end = dtime
|
||||
ts_start = dtime - timedelta(minutes=15)
|
||||
|
||||
return ts_start, ts_end
|
||||
|
||||
@staticmethod
|
||||
def parse_pse_rce_record(rec: dict):
|
||||
# 'dtime' date is the END of timeslot, so begining is dtime - t_stop
|
||||
dtime_utc = datetime.strptime(rec["dtime_utc"], "%Y-%m-%d %H:%M:%S").replace(tzinfo=UTC)
|
||||
ts_start, ts_end = PSE_RCEScraper.PSE_date_range(dtime=dtime_utc)
|
||||
|
||||
price_pln_mwh = float(rec["rce_pln"])
|
||||
price_pln_kwh = price_pln_mwh / 1000.0
|
||||
|
||||
meta = {
|
||||
"business_date": rec["business_date"],
|
||||
"source": "PSE_RCE",
|
||||
"publication_ts_utc": rec["publication_ts_utc"],
|
||||
"unit": "PLN/kWh",
|
||||
"taxes_included": False
|
||||
}
|
||||
return ts_start, ts_end, price_pln_kwh, meta
|
||||
121
Scraper/PstrykScraper.py
Normal file
121
Scraper/PstrykScraper.py
Normal file
@ -0,0 +1,121 @@
|
||||
from __future__ import annotations
|
||||
from datetime import datetime, timedelta, date, timezone
|
||||
from typing import List, Tuple, Dict, Any, Optional
|
||||
import os
|
||||
import requests
|
||||
from EnergyPriceScraper import EnergyPriceScraperBase
|
||||
|
||||
from logging_utils import HasLogger
|
||||
from utils.time_helpers import UTC, WARSAW_TZ
|
||||
|
||||
|
||||
class PstrykScraper(EnergyPriceScraperBase, HasLogger):
|
||||
"""
|
||||
Szablon: ceny publikowane przez sprzedawcę (Pstryk).
|
||||
Załóż: Bearer token w ENV PSTRYK_TOKEN, endpoint w ENV PSTRYK_API_BASE, np.:
|
||||
PSTRYK_API_BASE=https://api.pstryk.example.com
|
||||
Endpoint (przykład): GET /prices?date=YYYY-MM-DD
|
||||
-> [{"ts":"2025-08-27T00:00:00+02:00","net_pln_kwh":0.44}, ...]
|
||||
"""
|
||||
api_base: str
|
||||
token: str
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.PROVIDER = "PSTRYK"
|
||||
self.KIND = "market_price"
|
||||
self.SIDE = "buy"
|
||||
self.BUYER = "end_user"
|
||||
self.SELLER = "PSTRYK"
|
||||
|
||||
self.init_logger()
|
||||
|
||||
self.api_base = os.getenv("PSTRYK_API_BASE", "https://api.pstryk.pl/").rstrip("/")
|
||||
self.token = os.getenv("PSTRYK_TOKEN", "sk-QLX1AHLF83X15VWPYRUD5G87BK8DBF0SS9XLWQ8R")
|
||||
if not self.api_base or not self.token:
|
||||
raise RuntimeError("Ustaw PSTRYK_API_BASE i PSTRYK_TOKEN w środowisku.")
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({
|
||||
"accept": "application/json",
|
||||
"Authorization": f"{self.token}",
|
||||
"user-agent": "energy-scraper/1.0",
|
||||
})
|
||||
|
||||
self.log.debug("Initializing PSTRYK Done")
|
||||
|
||||
def fetch_range(self, start_date: datetime, end_date: datetime) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||
assert start_date < end_date
|
||||
assert start_date.tzinfo is not None
|
||||
assert end_date.tzinfo is not None
|
||||
assert start_date.tzinfo == UTC
|
||||
assert end_date.tzinfo == UTC
|
||||
|
||||
url = f"{self.api_base}/integrations/pricing"
|
||||
self.log.debug(f"Fetching url: {url}")
|
||||
self.log.info(f"Fetching range: [{start_date}, {end_date}) UTC / [{start_date.astimezone(WARSAW_TZ)}, {end_date.astimezone(WARSAW_TZ)}) Europe/Warsaw")
|
||||
r = self.session.get(url, params=
|
||||
{
|
||||
"resolution": "hour",
|
||||
"window_start":start_date.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"window_end": end_date.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
}, timeout=30)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
|
||||
out: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
|
||||
self.log.debug(f"Fetched {len(data['frames'])} data frames for [{start_date}, {end_date}) UTC")
|
||||
for frame in data['frames']:
|
||||
row = self.parse_generic_price_frame(frame)
|
||||
if row is not None:
|
||||
out.append(row)
|
||||
return out
|
||||
|
||||
def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||
start = datetime(
|
||||
year=business_day.year,
|
||||
month=business_day.month,
|
||||
day=business_day.day,
|
||||
hour=0,
|
||||
minute=0,
|
||||
second=0,
|
||||
tzinfo=tz
|
||||
).astimezone(UTC)
|
||||
|
||||
end = start + timedelta(days=1)
|
||||
return self.fetch_range(start, end)
|
||||
|
||||
def parse_generic_price_frame(self, rec: dict):
|
||||
"""
|
||||
Wejście (przykład):
|
||||
{'start':'2025-09-01T00:00:00+00:00','end':'2025-09-01T01:00:00+00:00',
|
||||
'price_net':0.37,'price_gross':0.65,'is_cheap':True,'is_expensive':False}
|
||||
Wyjście:
|
||||
(ts_start_utc, ts_end_utc, price_pln_kwh_net, meta)
|
||||
"""
|
||||
if rec.get("is_cheap") is None or rec.get("is_expensive") is None:
|
||||
self.log.info(f"Ignoring non-valid price frame {rec}")
|
||||
return None
|
||||
|
||||
try:
|
||||
ts_start = datetime.fromisoformat(rec["start"]).astimezone(UTC)
|
||||
ts_end = datetime.fromisoformat(rec["end"]).astimezone(UTC)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Bad iso timeformat in 'start'/'end': {e}") from e
|
||||
|
||||
if ts_end <= ts_start:
|
||||
raise ValueError(f"Bad range: start={ts_start.isoformat()} end={ts_end.isoformat()}")
|
||||
|
||||
try:
|
||||
price_pln_kwh_net = float(rec["price_net"])
|
||||
except Exception as e:
|
||||
raise ValueError(f"Price net not available 'price_net': {e}") from e
|
||||
|
||||
|
||||
meta = {
|
||||
"unit": "PLN/kWh",
|
||||
"taxes_included": False,
|
||||
"is_cheap": bool(rec.get("is_cheap") ),
|
||||
"is_expensive": bool(rec.get("is_expensive"))
|
||||
}
|
||||
return ts_start, ts_end, price_pln_kwh_net, meta
|
||||
1
Scraper/__init__.py
Normal file
1
Scraper/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# (left intentionally empty; factory imports modules dynamically)
|
||||
83
app.py
Normal file
83
app.py
Normal file
@ -0,0 +1,83 @@
|
||||
import os
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from zoneinfo import ZoneInfo
|
||||
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
|
||||
import EnergyPriceScraperFactory
|
||||
|
||||
from logging_setup import setup_logging
|
||||
from utils.time_helpers import WARSAW_TZ
|
||||
|
||||
from app_impl import hourly_tick
|
||||
from logging_utils import function_logger
|
||||
|
||||
setup_logging(logging.DEBUG)
|
||||
sched = BlockingScheduler(timezone=WARSAW_TZ)
|
||||
|
||||
def run_pstryk():
|
||||
log = function_logger()
|
||||
log.info("Running PSTRYK")
|
||||
try:
|
||||
conn = EnergyPriceScraperFactory.setup_db()
|
||||
pstryk_scraper = EnergyPriceScraperFactory.create("Pstryk", conn=conn)
|
||||
# 11:30 and hourly safety check
|
||||
hourly_tick(pstryk_scraper)
|
||||
except Exception as e:
|
||||
log.error(f"PSTRYK throw an exception: {e}")
|
||||
|
||||
|
||||
def run_PSE_RCE():
|
||||
log = function_logger()
|
||||
log.info("Running PSE-RCE")
|
||||
try:
|
||||
conn = EnergyPriceScraperFactory.setup_db()
|
||||
scraper = EnergyPriceScraperFactory.create("PSE_RCE", conn=conn)
|
||||
hourly_tick(scraper)
|
||||
except Exception as e:
|
||||
log.error(f"PSE-RCE throw an exception: {e}")
|
||||
|
||||
|
||||
# Daily triggers at exact local times (DST-safe)
|
||||
sched.add_job(run_pstryk, 'cron', hour='*', minute='30')
|
||||
sched.add_job(run_PSE_RCE, 'cron', hour='*', minute='30')
|
||||
|
||||
sched.start()
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# setup_logging(logging.DEBUG)
|
||||
#
|
||||
# conn = EnergyPriceScraperFactory.setup_db()
|
||||
# # scraper = EnergyPriceScraperFactory.create("InstratRDN_CSV", conn=conn, path=path)
|
||||
# # scraper = EnergyPriceScraperFactory.create("PSE_RCE", conn=conn)
|
||||
# scraper = EnergyPriceScraperFactory.create("Pstryk", conn=conn)
|
||||
|
||||
# distribution_cost = DistributionCostFactory.create("TauronG13")
|
||||
# energy_price = DynamicPricesProvider.DynamicPricesProvider(PROVIDER="instrat", conn=conn, KIND="fixing I", SIDE="buy")
|
||||
#
|
||||
# start = datetime(2025, 6, 27, 0, 0) # lokalny czas PL (jeśli naive – funkcja sama doda WAW)
|
||||
#
|
||||
# rows = []
|
||||
# for h in range(24*3):
|
||||
# ts = start + timedelta(hours=h)
|
||||
# dist = distribution_cost.rate(ts)
|
||||
# price = energy_price.rate(ts)
|
||||
# pstryk_fee = 0.08
|
||||
#
|
||||
# rows.append({
|
||||
# "ts": ts,
|
||||
# "Dystrybucja (net)": dist,
|
||||
# "Energia (net)": price,
|
||||
# "Pstryk (net)" : pstryk_fee,
|
||||
# "Podatki": (distribution_cost.rate(ts) + price + pstryk_fee) * 1.23 - (distribution_cost.rate(ts) + price + pstryk_fee),
|
||||
# })
|
||||
#
|
||||
# fig, ax = plot_stacked_with_negatives(
|
||||
# rows,
|
||||
# title="Składowe ceny 1 kWh",
|
||||
# order=["Dystrybucja (net)", "Energia (net)", "Podatki"] # opcjonalnie
|
||||
# )
|
||||
#
|
||||
# plt.show()
|
||||
|
||||
88
app_impl.py
Normal file
88
app_impl.py
Normal file
@ -0,0 +1,88 @@
|
||||
from __future__ import annotations
|
||||
from datetime import datetime, date, time, timedelta, timezone
|
||||
from zoneinfo import ZoneInfo
|
||||
from typing import Sequence, Tuple, Optional
|
||||
|
||||
from logging_utils import function_logger
|
||||
from utils.time_helpers import WARSAW_TZ, UTC
|
||||
|
||||
def local_midnight(d: date, tz: ZoneInfo) -> datetime:
|
||||
"""Return local midnight (tz-aware) for a given calendar date."""
|
||||
return datetime(d.year, d.month, d.day, 0, 0, tzinfo=tz)
|
||||
|
||||
def end_of_business_day_utc(business_day: date, tz: ZoneInfo) -> datetime:
|
||||
"""
|
||||
End of the business day [start, end) expressed in UTC.
|
||||
For day D this is local midnight of D+1 converted to UTC.
|
||||
Correct across 23h/25h DST days.
|
||||
"""
|
||||
end_local = local_midnight(business_day + timedelta(days=1), tz=tz)
|
||||
return end_local.astimezone(UTC)
|
||||
|
||||
def is_day_fully_ingested(scraper, business_day: date, tz: ZoneInfo) -> bool:
|
||||
"""
|
||||
Decide if 'business_day' is fully present in DB based on scraper.last_entry().
|
||||
Assumes that a fully ingested day ends at local midnight of D+1 in the DB.
|
||||
"""
|
||||
log = function_logger()
|
||||
last: Optional[datetime] = scraper.last_entry()
|
||||
if not last:
|
||||
log.info("No last entry found for scrapper")
|
||||
return False
|
||||
|
||||
log.debug("Last entry: {}".format(last))
|
||||
# Normalize to UTC in case session TimeZone differs
|
||||
assert last.tzinfo is not None, "last must be tz-aware"
|
||||
|
||||
needed_end_utc = end_of_business_day_utc(business_day, tz)
|
||||
return last >= needed_end_utc
|
||||
|
||||
def maybe_ingest_for_next_day(
|
||||
scraper,
|
||||
now_local: datetime,
|
||||
tz: ZoneInfo
|
||||
) -> int:
|
||||
"""
|
||||
If the publication time for tomorrow has passed (with grace), and tomorrow is not in DB yet,
|
||||
call ingest_day(tomorrow). Return the number of rows affected by the ingestion.
|
||||
"""
|
||||
log = function_logger()
|
||||
log.debug("Start")
|
||||
assert now_local.tzinfo is not None, "now_local must be tz-aware (Europe/Warsaw)."
|
||||
|
||||
today = now_local.date()
|
||||
# Publication happens today for the next business day
|
||||
target_day = today + timedelta(days=1)
|
||||
log.info("Target day = {}".format(target_day))
|
||||
|
||||
if is_day_fully_ingested(scraper, target_day, tz):
|
||||
log.info("Publication already happend, we got the data already, skipping")
|
||||
return 0
|
||||
|
||||
# Ingest the next business day
|
||||
log.info("Target day is NOT fully ingested, trying to read {}".format(target_day))
|
||||
ingested_rows = scraper.ingest_day(target_day)
|
||||
log.debug("Ingested rows = {}".format(ingested_rows))
|
||||
return ingested_rows
|
||||
|
||||
def hourly_tick(
|
||||
scraper
|
||||
) -> int:
|
||||
"""
|
||||
Run this once per hour.
|
||||
- Backfills if needed (once on startup or when the DB lags).
|
||||
- Attempts ingestion for the next business day after each publication window.
|
||||
Returns (affected_from_backfill, affected_from_next_day).
|
||||
"""
|
||||
log = function_logger()
|
||||
now_local = datetime.now(WARSAW_TZ)
|
||||
|
||||
log.info(f"Hourly tick started for {now_local}")
|
||||
if now_local.time().hour < 11:
|
||||
log.info("Hourly tick skipped, too early for update")
|
||||
return 0
|
||||
# 1) Try to ingest the next business day after each con figured window
|
||||
affected_next = 0
|
||||
affected_next += maybe_ingest_for_next_day(scraper, now_local, WARSAW_TZ)
|
||||
log.info(f"Hourly tick ended for {now_local} with {affected_next} affected rows")
|
||||
return affected_next
|
||||
35
install.sh
Normal file
35
install.sh
Normal file
@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# --- config ---
|
||||
REPO_URL="${REPO_URL:-<PUT_YOUR_GIT_URL_HERE>}"
|
||||
APP_DIR="/opt/energy-price-scrapers"
|
||||
|
||||
# --- clone or update repo ---
|
||||
if [ ! -d "$APP_DIR/.git" ]; then
|
||||
git clone "$REPO_URL" "$APP_DIR"
|
||||
else
|
||||
git -C "$APP_DIR" fetch --all --prune
|
||||
git -C "$APP_DIR" checkout main
|
||||
git -C "$APP_DIR" pull --ff-only
|
||||
fi
|
||||
|
||||
# --- venv build/refresh ---
|
||||
python3 -m venv "$APP_DIR/.venv"
|
||||
"$APP_DIR/.venv/bin/pip" install --upgrade pip setuptools wheel
|
||||
if [ -f "$APP_DIR/requirements.txt" ]; then
|
||||
"$APP_DIR/.venv/bin/pip" install -r "$APP_DIR/requirements.txt"
|
||||
else
|
||||
echo "requirements.txt missing; aborting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
chown -R energy:energy "$APP_DIR"
|
||||
echo "Install complete."
|
||||
|
||||
sudo install -m 0755 $APP_DIR/os/energy-price-scrapers-update.sh /usr/local/bin/energy-price-scrapers-update
|
||||
sudo install -m 0755 $APP_DIR/os/energy-price-scrapers.service /etc/systemd/system/energy-price-scrapers.service
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now energy-price-scrapers.service
|
||||
sudo systemctl status energy-price-scrapers.service
|
||||
52
logging_setup.py
Normal file
52
logging_setup.py
Normal file
@ -0,0 +1,52 @@
|
||||
import logging.config
|
||||
import logging, os, sys, platform
|
||||
try:
|
||||
# Only available on Linux with systemd
|
||||
from systemd.journal import JournalHandler # type: ignore
|
||||
HAS_JOURNAL = True
|
||||
except Exception:
|
||||
HAS_JOURNAL = False
|
||||
|
||||
|
||||
class MaxLevelFilter(logging.Filter):
|
||||
"""Allow records up to a certain level (inclusive)."""
|
||||
def __init__(self, level): super().__init__(); self.level = level
|
||||
def filter(self, record): return record.levelno <= self.level
|
||||
|
||||
|
||||
class EnsureContext(logging.Filter):
|
||||
"""Always provide 'context' so the formatter never KeyErrors."""
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
if not hasattr(record, "context"):
|
||||
record.context = ""
|
||||
return True
|
||||
|
||||
def setup_logging(level: int = logging.INFO, syslog_id: str = "energy-scrapers") -> None:
|
||||
"""Use journald if available; otherwise split stdout/stderr (Windows-friendly)."""
|
||||
root = logging.getLogger()
|
||||
root.handlers.clear()
|
||||
root.setLevel(level)
|
||||
|
||||
fmt = logging.Formatter("%(asctime)s %(levelname)s [%(name)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
||||
|
||||
if HAS_JOURNAL and platform.system() == "Linux":
|
||||
# Native journald handler (preserves levels/metadata)
|
||||
h = JournalHandler(SYSLOG_IDENTIFIER=syslog_id)
|
||||
h.setFormatter(logging.Formatter("%(message)s")) # journald adds timestamp/level
|
||||
root.addHandler(h)
|
||||
else:
|
||||
# Portable fallback: INFO and below -> stdout, WARNING+ -> stderr
|
||||
h_out = logging.StreamHandler(sys.stdout)
|
||||
h_out.setLevel(logging.DEBUG)
|
||||
h_out.addFilter(MaxLevelFilter(logging.INFO))
|
||||
h_out.setFormatter(fmt)
|
||||
|
||||
h_err = logging.StreamHandler(sys.stderr)
|
||||
h_err.setLevel(logging.WARNING)
|
||||
h_err.setFormatter(fmt)
|
||||
|
||||
root.addHandler(h_out)
|
||||
root.addHandler(h_err)
|
||||
|
||||
# Optional: make sure Python doesn’t buffer output (useful on Windows/services)
|
||||
os.environ.setdefault("PYTHONUNBUFFERED", "1")
|
||||
59
logging_utils.py
Normal file
59
logging_utils.py
Normal file
@ -0,0 +1,59 @@
|
||||
import logging
|
||||
from contextlib import contextmanager
|
||||
import inspect
|
||||
|
||||
def _fmt_ctx(ctx: dict) -> str:
|
||||
# Build a single bracketed context block for the formatter: "[k=v a=b] "
|
||||
if not ctx:
|
||||
return ""
|
||||
kv = " ".join(f"{k}={v}" for k, v in ctx.items() if v is not None)
|
||||
return f"[{kv}] "
|
||||
|
||||
|
||||
class HasLogger:
|
||||
"""Mixin with explicit logger initialization. No __init__, no MRO dependency."""
|
||||
|
||||
def __init__(self):
|
||||
self.log = None
|
||||
self._log_ctx = None
|
||||
self._base_logger = None
|
||||
|
||||
def init_logger(self, *, context: dict | None = None, name: str | None = None) -> None:
|
||||
"""Initialize the logger explicitly; call from your class __init__."""
|
||||
base_name = name or self.__class__.__name__
|
||||
self._base_logger = logging.getLogger(base_name)
|
||||
self._log_ctx: dict = dict(context or {})
|
||||
self.log = logging.LoggerAdapter(self._base_logger, {"context": _fmt_ctx(self._log_ctx)})
|
||||
|
||||
def set_logger_context(self, **context) -> None:
|
||||
"""Persistently merge new context into this instance and refresh adapter."""
|
||||
if not hasattr(self, "_base_logger"): # safety if init_logger was forgotten
|
||||
self.init_logger()
|
||||
self._log_ctx.update({k: v for k, v in context.items() if v is not None})
|
||||
self.log = logging.LoggerAdapter(self._base_logger, {"context": _fmt_ctx(self._log_ctx)})
|
||||
|
||||
def child_logger(self, **extra) -> logging.LoggerAdapter:
|
||||
"""Temporary adapter with additional context (does not mutate instance context)."""
|
||||
if not hasattr(self, "_base_logger"):
|
||||
self.init_logger()
|
||||
merged = self._log_ctx.copy()
|
||||
merged.update({k: v for k, v in extra.items() if v is not None})
|
||||
return logging.LoggerAdapter(self._base_logger, {"context": _fmt_ctx(merged)})
|
||||
|
||||
@contextmanager
|
||||
def scoped_log(self, **extra):
|
||||
"""Context manager yielding a temporary adapter with extra context."""
|
||||
yield self.child_logger(**extra)
|
||||
|
||||
|
||||
def function_logger(name: str | None = None, **context):
|
||||
"""Return a LoggerAdapter that follows the HasLogger format for free functions."""
|
||||
# Derive a readable default name: "<module>.<function>"
|
||||
if name is None:
|
||||
frame = inspect.currentframe().f_back # caller
|
||||
func = frame.f_code.co_name
|
||||
mod = frame.f_globals.get("__name__", "app")
|
||||
name = f"{mod}.{func}"
|
||||
h = HasLogger()
|
||||
h.init_logger(name=name, context=context)
|
||||
return h.log
|
||||
16
os/energy-price-scrapers-update.sh
Normal file
16
os/energy-price-scrapers-update.sh
Normal file
@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
APP_DIR="/opt/energy-price-scrapers"
|
||||
|
||||
sudo -u energy git -C "$APP_DIR" fetch --all --prune
|
||||
sudo -u energy git -C "$APP_DIR" checkout main
|
||||
sudo -u energy git -C "$APP_DIR" pull --ff-only
|
||||
|
||||
# upgrade deps if changed
|
||||
sudo -u energy "$APP_DIR/.venv/bin/pip" install --upgrade pip
|
||||
if [ -f "$APP_DIR/requirements.txt" ]; then
|
||||
sudo -u energy "$APP_DIR/.venv/bin/pip" install -r "$APP_DIR/requirements.txt"
|
||||
fi
|
||||
|
||||
sudo systemctl restart energy-price-scrapers.service
|
||||
echo "Updated & restarted."
|
||||
23
os/energy-price-scrapers.service
Normal file
23
os/energy-price-scrapers.service
Normal file
@ -0,0 +1,23 @@
|
||||
[Unit]
|
||||
Description=Energy price scrapers (APScheduler)
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=energy
|
||||
Group=energy
|
||||
WorkingDirectory=/opt/energy-price-scrapers
|
||||
EnvironmentFile=/etc/energy-price-scrapers.env
|
||||
# Use the venv python to run your scheduler app
|
||||
ExecStart=/opt/energy-price-scrapers/.venv/bin/python /opt/energy-price-scrapers/app.py
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
# Hardening (tune as needed)
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
ProtectHome=true
|
||||
ProtectSystem=full
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
107
plot_cost_breakdown.py
Normal file
107
plot_cost_breakdown.py
Normal file
@ -0,0 +1,107 @@
|
||||
from __future__ import annotations
|
||||
from zoneinfo import ZoneInfo
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
from typing import List, Dict, Sequence, Optional
|
||||
from datetime import datetime
|
||||
import math
|
||||
|
||||
WAW = ZoneInfo("Europe/Warsaw")
|
||||
|
||||
def plot_stacked_with_negatives(
|
||||
rows: List[Dict[str, float]],
|
||||
*,
|
||||
x_key: str = "ts", # klucz opisujący oś X (np. datetime lub str)
|
||||
order: Optional[Sequence[str]] = None, # kolejność warstw; domyślnie heurystyka
|
||||
title: Optional[str] = None,
|
||||
ylabel: str = "PLN/kWh",
|
||||
sum_label: str = "Cena brutto",
|
||||
save_path: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Każdy element `rows` to słownik:
|
||||
{ x_key: <datetime|str>, "Nazwa składnika 1": val1, "Nazwa składnika 2": val2, ... }
|
||||
Funkcja NIC nie liczy „merytorycznie” – tylko rysuje to, co dostanie.
|
||||
- Wart. dodatnie układa w górę, ujemne w dół (oddzielne stacki).
|
||||
- Linia `sum_label` to suma wszystkich składników (bez x_key) dla każdego słupka.
|
||||
"""
|
||||
if not rows:
|
||||
raise ValueError("Brak danych: 'rows' jest puste.")
|
||||
|
||||
# 1) etykiety osi X
|
||||
labels = []
|
||||
for r in rows:
|
||||
if x_key not in r:
|
||||
raise KeyError(f"Brak klucza '{x_key}' w wierszu: {r}")
|
||||
xv = r[x_key]
|
||||
labels.append(xv.strftime("%Y-%m-%d\n%H:%M") if isinstance(xv, datetime) else str(xv))
|
||||
|
||||
# 2) lista kluczy (warstw)
|
||||
all_keys = []
|
||||
for r in rows:
|
||||
for k in r.keys():
|
||||
if k == x_key:
|
||||
continue
|
||||
if k not in all_keys:
|
||||
all_keys.append(k)
|
||||
|
||||
if order:
|
||||
ordered = [k for k in order if k in all_keys] + [k for k in all_keys if k not in order]
|
||||
else:
|
||||
prefer = ["Dystrybucja (net)", "Energia (net)", "Podatki"]
|
||||
preferred = [k for k in prefer if k in all_keys]
|
||||
rest = sorted([k for k in all_keys if k not in preferred])
|
||||
ordered = preferred + rest
|
||||
|
||||
# 3) macierze wartości
|
||||
values_by_key = {k: [float(r.get(k, 0.0)) for r in rows] for k in ordered}
|
||||
n = len(rows)
|
||||
x = list(range(n))
|
||||
|
||||
# 4) rysuj – dwa stacki: dodatni (bottom_pos) i ujemny (bottom_neg)
|
||||
fig, ax = plt.subplots(figsize=(max(8, n * 0.35), 5))
|
||||
bottom_pos = [0.0] * n
|
||||
bottom_neg = [0.0] * n
|
||||
legend_done = set()
|
||||
|
||||
for k in ordered:
|
||||
vals = values_by_key[k]
|
||||
pos_vals = [v if v > 0 else 0.0 for v in vals]
|
||||
neg_vals = [v if v < 0 else 0.0 for v in vals]
|
||||
|
||||
# zdecyduj, gdzie nadać etykietę do legendy (tylko raz)
|
||||
label_to_use = None
|
||||
if k not in legend_done and (any(v != 0 for v in vals)):
|
||||
label_to_use = k
|
||||
legend_done.add(k)
|
||||
|
||||
# dodatnie
|
||||
if any(pos_vals):
|
||||
ax.bar(x, pos_vals, bottom=bottom_pos, label=label_to_use)
|
||||
bottom_pos = [b + v for b, v in zip(bottom_pos, pos_vals)]
|
||||
label_to_use = None # etykieta już wykorzystana
|
||||
|
||||
# ujemne (wysokość ujemna + dolna krawędź = kumulacja ujemnych)
|
||||
if any(neg_vals):
|
||||
ax.bar(x, neg_vals, bottom=bottom_neg, label=label_to_use)
|
||||
bottom_neg = [b + v for b, v in zip(bottom_neg, neg_vals)]
|
||||
label_to_use = None
|
||||
|
||||
# 5) linia sumy (brutto): suma wszystkich składników w słupku
|
||||
y_sum = [sum(values_by_key[k][i] for k in ordered) for i in range(n)]
|
||||
ax.plot(x, y_sum, label=sum_label)
|
||||
|
||||
# 6) oś X – pionowe etykiety i przerzedzenie
|
||||
step = 1 if n <= 24 else math.ceil(n / 24)
|
||||
ax.set_xticks(x[::step], [labels[i] for i in range(0, n, step)], rotation=90, ha="center")
|
||||
|
||||
ax.set_ylabel(ylabel)
|
||||
ax.set_xlabel("Okres")
|
||||
if title:
|
||||
ax.set_title(title)
|
||||
ax.legend()
|
||||
ax.grid(axis="y", linestyle="--", alpha=0.4)
|
||||
fig.tight_layout()
|
||||
if save_path:
|
||||
fig.savefig(save_path, dpi=150)
|
||||
return fig, ax
|
||||
6
requirements.txt
Normal file
6
requirements.txt
Normal file
@ -0,0 +1,6 @@
|
||||
psycopg[binary]>=3.1,<3.3
|
||||
pandas~=2.3.2
|
||||
requests>=2.32,<3.0
|
||||
apscheduler>=3.10,<4.0
|
||||
|
||||
systemd-python>=235; sys_platform != "win32"
|
||||
44
utils/calendar_pl.py
Normal file
44
utils/calendar_pl.py
Normal file
@ -0,0 +1,44 @@
|
||||
from __future__ import annotations
|
||||
from datetime import date, timedelta, datetime
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
def gov_energy_prices_shield(ts: datetime):
|
||||
WAW = ZoneInfo("Europe/Warsaw")
|
||||
START = datetime(2024, 1, 1, 0, 0, tzinfo=WAW)
|
||||
END = datetime(2025, 9, 30, 23, 59, 59, 999999, tzinfo=WAW)
|
||||
|
||||
ts_local = ts.astimezone(WAW) if ts.tzinfo else ts.replace(tzinfo=WAW)
|
||||
return START <= ts_local <= END
|
||||
|
||||
def _easter_sunday(year: int) -> date:
|
||||
# Meeus/Jones/Butcher
|
||||
a = year % 19
|
||||
b = year // 100
|
||||
c = year % 100
|
||||
d = b // 4
|
||||
e = b % 4
|
||||
f = (b + 8) // 25
|
||||
g = (b - f + 1) // 3
|
||||
h = (19*a + b - d - g + 15) % 30
|
||||
i = c // 4
|
||||
k = c % 4
|
||||
l = (32 + 2*e + 2*i - h - k) % 7
|
||||
m = (a + 11*h + 22*l) // 451
|
||||
month = (h + l - 7*m + 114) // 31
|
||||
day = 1 + ((h + l - 7*m + 114) % 31)
|
||||
return date(year, month, day)
|
||||
|
||||
def polish_holidays(year: int) -> set[date]:
|
||||
easter = _easter_sunday(year)
|
||||
return {
|
||||
date(year, 1, 1), date(year, 1, 6),
|
||||
date(year, 5, 1), date(year, 5, 3),
|
||||
date(year, 8, 15), date(year, 11, 1), date(year, 11, 11),
|
||||
date(year, 12, 25), date(year, 12, 26),
|
||||
easter, easter + timedelta(days=1),
|
||||
easter + timedelta(days=49), # Pentecost
|
||||
easter + timedelta(days=60), # Corpus Christi
|
||||
}
|
||||
|
||||
def is_weekend_or_holiday(d: date) -> bool:
|
||||
return d.weekday() >= 5 or d in polish_holidays(d.year)
|
||||
15
utils/time_helpers.py
Normal file
15
utils/time_helpers.py
Normal file
@ -0,0 +1,15 @@
|
||||
from __future__ import annotations
|
||||
from datetime import time
|
||||
from typing import Tuple
|
||||
import zoneinfo
|
||||
|
||||
WARSAW_TZ = zoneinfo.ZoneInfo("Europe/Warsaw")
|
||||
UTC = zoneinfo.ZoneInfo("UTC")
|
||||
TimeRange = Tuple[time, time]
|
||||
|
||||
def in_range_local(t_local: time, rng: TimeRange) -> bool:
|
||||
"""[start, end) in local time, supports ranges crossing midnight."""
|
||||
start, end = rng
|
||||
if start <= end:
|
||||
return start <= t_local < end
|
||||
return (t_local >= start) or (t_local < end)
|
||||
Loading…
Reference in New Issue
Block a user