This commit is contained in:
Bartosz Wieczorek 2025-09-03 10:58:40 +02:00
parent 572c301f2f
commit afbe6b564a
15 changed files with 114 additions and 190 deletions

View File

@ -6,5 +6,7 @@
</content>
<orderEntry type="jdk" jdkName="Python 3.13 (entsoe_scrapper)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="module" module-name="ranczo-energy-usage-scraper" />
<orderEntry type="module" module-name="ranczo-energy-costs" />
</component>
</module>

View File

@ -3,6 +3,8 @@
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/energy_price_scraper.iml" filepath="$PROJECT_DIR$/.idea/energy_price_scraper.iml" />
<module fileurl="file://$PROJECT_DIR$/../ranczo-energy-costs/.idea/ranczo-energy-costs.iml" filepath="$PROJECT_DIR$/../ranczo-energy-costs/.idea/ranczo-energy-costs.iml" />
<module fileurl="file://$PROJECT_DIR$/../ranczo-energy-usage-scraper/.idea/ranczo-energy-usage-scraper.iml" filepath="$PROJECT_DIR$/../ranczo-energy-usage-scraper/.idea/ranczo-energy-usage-scraper.iml" />
</modules>
</component>
</project>

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="" vcs="Git" />
</component>
</project>

View File

@ -5,7 +5,7 @@ from typing import Iterable, List, Tuple, Dict, Any, Optional
from zoneinfo import ZoneInfo
import json
import psycopg
from utils.time_helpers import WARSAW_TZ
from utils.time import WARSAW_TZ
IntervalRow = Tuple[datetime, datetime, float, str, str, str, str, str, str] # patrz _rows_to_upsert

View File

@ -1,19 +0,0 @@
sudo apt update
sudo apt install -y python3-venv python3-pip git tzdata
# system user without shell login
sudo useradd -r -s /usr/sbin/nologin -d /opt/energy-scrapers energy
copy service to
/etc/systemd/system/energy-price-scrapers.service
sudo install -m 0755 os/energy-price-scrapers-update.sh /usr/local/bin/energy-scrapers-update
as root
systemctl daemon-reload
systemctl enable --now energy-price-scrapers.service
systemctl status energy-price-scrapers.service
~~~~
logi
journalctl -u energy-price-scrapers.service -f

View File

@ -3,7 +3,7 @@ from datetime import datetime, timedelta, date
from typing import List, Tuple, Dict, Any
import pandas as pd
from EnergyPriceScraper import EnergyPriceScraperBase
from utils.time_helpers import WARSAW_TZ, UTC
from utils.time import WARSAW_TZ, UTC
# CSV pobrane z https://energy.instrat.pl/ceny/energia-rdn-godzinowe/
class InstratRDN_CSVScraper(EnergyPriceScraperBase):

View File

@ -4,9 +4,8 @@ from typing import List, Tuple, Dict, Any
import requests
from EnergyPriceScraper import EnergyPriceScraperBase
from utils.time_helpers import UTC, WARSAW_TZ
from logging_utils import HasLogger
from utils.time import UTC, WARSAW_TZ
from utils.logging import HasLogger
class PSE_RCEScraper(EnergyPriceScraperBase, HasLogger):
@ -61,15 +60,7 @@ class PSE_RCEScraper(EnergyPriceScraperBase, HasLogger):
return out
def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
start = datetime(
year=business_day.year,
month=business_day.month,
day=business_day.day,
hour=0,
minute=0,
second=0,
tzinfo=tz
).astimezone(UTC)
start = datetime.combine(business_day, datetime.min.time(), tzinfo=tz).astimezone(UTC)
end = start + timedelta(days=1)
return self.fetch_range(start, end)

View File

@ -1,12 +1,13 @@
from __future__ import annotations
from datetime import datetime, timedelta, date, timezone
from typing import List, Tuple, Dict, Any, Optional
from typing import List, Tuple, Dict, Any
import os
import requests
from EnergyPriceScraper import EnergyPriceScraperBase
from logging_utils import HasLogger
from utils.time_helpers import UTC, WARSAW_TZ
from utils.logging import HasLogger
from utils.time import UTC, WARSAW_TZ
class PstrykScraper(EnergyPriceScraperBase, HasLogger):
@ -72,15 +73,7 @@ class PstrykScraper(EnergyPriceScraperBase, HasLogger):
return out
def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
start = datetime(
year=business_day.year,
month=business_day.month,
day=business_day.day,
hour=0,
minute=0,
second=0,
tzinfo=tz
).astimezone(UTC)
start = datetime.combine(business_day, datetime.min.time(), tzinfo=tz).astimezone(UTC)
end = start + timedelta(days=1)
return self.fetch_range(start, end)

74
app.py
View File

@ -1,83 +1,39 @@
import os
import logging
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.triggers.cron import CronTrigger
import EnergyPriceScraperFactory
from logging_setup import setup_logging
from utils.time_helpers import WARSAW_TZ
from utils.logging import setup_logging
from utils.time import WARSAW_TZ
from app_impl import hourly_tick
from logging_utils import function_logger
from utils.logging import function_logger
setup_logging(logging.DEBUG)
sched = BlockingScheduler(timezone=WARSAW_TZ)
def run_pstryk():
def run(name: str):
log = function_logger()
log.info("Running PSTRYK")
log.info(f"Running {name}")
try:
conn = EnergyPriceScraperFactory.setup_db()
pstryk_scraper = EnergyPriceScraperFactory.create("Pstryk", conn=conn)
pstryk_scraper = EnergyPriceScraperFactory.create(name, conn=conn)
# 11:30 and hourly safety check
hourly_tick(pstryk_scraper)
except Exception as e:
log.error(f"PSTRYK throw an exception: {e}")
log.error(f"{name} throw an exception: {e}")
def run_pstryk():
run("Pstryk")
def run_PSE_RCE():
log = function_logger()
log.info("Running PSE-RCE")
try:
conn = EnergyPriceScraperFactory.setup_db()
scraper = EnergyPriceScraperFactory.create("PSE_RCE", conn=conn)
hourly_tick(scraper)
except Exception as e:
log.error(f"PSE-RCE throw an exception: {e}")
run("PSE_RCE")
# Daily triggers at exact local times (DST-safe)
sched.add_job(run_pstryk, 'cron', hour='*', minute='30')
sched.add_job(run_PSE_RCE, 'cron', hour='*', minute='30')
sched.start()
# if __name__ == "__main__":
# setup_logging(logging.DEBUG)
#
# conn = EnergyPriceScraperFactory.setup_db()
# # scraper = EnergyPriceScraperFactory.create("InstratRDN_CSV", conn=conn, path=path)
# # scraper = EnergyPriceScraperFactory.create("PSE_RCE", conn=conn)
# scraper = EnergyPriceScraperFactory.create("Pstryk", conn=conn)
# distribution_cost = DistributionCostFactory.create("TauronG13")
# energy_price = DynamicPricesProvider.DynamicPricesProvider(PROVIDER="instrat", conn=conn, KIND="fixing I", SIDE="buy")
#
# start = datetime(2025, 6, 27, 0, 0) # lokalny czas PL (jeśli naive funkcja sama doda WAW)
#
# rows = []
# for h in range(24*3):
# ts = start + timedelta(hours=h)
# dist = distribution_cost.rate(ts)
# price = energy_price.rate(ts)
# pstryk_fee = 0.08
#
# rows.append({
# "ts": ts,
# "Dystrybucja (net)": dist,
# "Energia (net)": price,
# "Pstryk (net)" : pstryk_fee,
# "Podatki": (distribution_cost.rate(ts) + price + pstryk_fee) * 1.23 - (distribution_cost.rate(ts) + price + pstryk_fee),
# })
#
# fig, ax = plot_stacked_with_negatives(
# rows,
# title="Składowe ceny 1 kWh",
# order=["Dystrybucja (net)", "Energia (net)", "Podatki"] # opcjonalnie
# )
#
# plt.show()
sched.add_job(run_pstryk, 'cron', hour='*', minute='0')
sched.add_job(run_PSE_RCE, 'cron', hour='*', minute='0')
sched.start()

View File

@ -1,23 +1,11 @@
from __future__ import annotations
from datetime import datetime, date, time, timedelta, timezone
from datetime import datetime, date, timedelta
from zoneinfo import ZoneInfo
from typing import Sequence, Tuple, Optional
from typing import Optional
from logging_utils import function_logger
from utils.time_helpers import WARSAW_TZ, UTC
from utils.logging import function_logger
from utils.time import WARSAW_TZ, end_of_business_day_utc
def local_midnight(d: date, tz: ZoneInfo) -> datetime:
"""Return local midnight (tz-aware) for a given calendar date."""
return datetime(d.year, d.month, d.day, 0, 0, tzinfo=tz)
def end_of_business_day_utc(business_day: date, tz: ZoneInfo) -> datetime:
"""
End of the business day [start, end) expressed in UTC.
For day D this is local midnight of D+1 converted to UTC.
Correct across 23h/25h DST days.
"""
end_local = local_midnight(business_day + timedelta(days=1), tz=tz)
return end_local.astimezone(UTC)
def is_day_fully_ingested(scraper, business_day: date, tz: ZoneInfo) -> bool:
"""

View File

@ -1,53 +0,0 @@
import logging
import logging.config
import logging, os, sys, platform
try:
# Only available on Linux with systemd
from systemd.journal import JournalHandler # type: ignore
HAS_JOURNAL = True
except Exception:
HAS_JOURNAL = False
class MaxLevelFilter(logging.Filter):
"""Allow records up to a certain level (inclusive)."""
def __init__(self, level): super().__init__(); self.level = level
def filter(self, record): return record.levelno <= self.level
class EnsureContext(logging.Filter):
"""Always provide 'context' so the formatter never KeyErrors."""
def filter(self, record: logging.LogRecord) -> bool:
if not hasattr(record, "context"):
record.context = ""
return True
def setup_logging(level: int = logging.INFO, syslog_id: str = "energy-scrapers") -> None:
"""Use journald if available; otherwise split stdout/stderr (Windows-friendly)."""
root = logging.getLogger()
root.handlers.clear()
root.setLevel(level)
fmt = logging.Formatter("%(asctime)s %(levelname)s [%(name)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
if HAS_JOURNAL and platform.system() == "Linux":
# Native journald handler (preserves levels/metadata)
h = JournalHandler(SYSLOG_IDENTIFIER=syslog_id)
h.setFormatter(logging.Formatter("%(message)s")) # journald adds timestamp/level
root.addHandler(h)
else:
# Portable fallback: INFO and below -> stdout, WARNING+ -> stderr
h_out = logging.StreamHandler(sys.stdout)
h_out.setLevel(logging.DEBUG)
h_out.addFilter(MaxLevelFilter(logging.INFO))
h_out.setFormatter(fmt)
h_err = logging.StreamHandler(sys.stderr)
h_err.setLevel(logging.WARNING)
h_err.setFormatter(fmt)
root.addHandler(h_out)
root.addHandler(h_err)
# Optional: make sure Python doesnt buffer output (useful on Windows/services)
os.environ.setdefault("PYTHONUNBUFFERED", "1")

View File

@ -1,6 +1,57 @@
import logging
from contextlib import contextmanager
import inspect
import logging.config
import logging, os, sys, platform
try:
# Only available on Linux with systemd
from systemd.journal import JournalHandler # type: ignore
HAS_JOURNAL = True
except Exception:
HAS_JOURNAL = False
class MaxLevelFilter(logging.Filter):
"""Allow records up to a certain level (inclusive)."""
def __init__(self, level): super().__init__(); self.level = level
def filter(self, record): return record.levelno <= self.level
class EnsureContext(logging.Filter):
"""Always provide 'context' so the formatter never KeyErrors."""
def filter(self, record: logging.LogRecord) -> bool:
if not hasattr(record, "context"):
record.context = ""
return True
def setup_logging(level: int = logging.INFO, syslog_id: str = "energy-scrapers") -> None:
"""Use journald if available; otherwise split stdout/stderr (Windows-friendly)."""
root = logging.getLogger()
root.handlers.clear()
root.setLevel(level)
fmt = logging.Formatter("%(asctime)s %(levelname)s [%(name)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
if HAS_JOURNAL and platform.system() == "Linux":
# Native journald handler (preserves levels/metadata)
h = JournalHandler(SYSLOG_IDENTIFIER=syslog_id)
h.setFormatter(logging.Formatter("%(message)s")) # journald adds timestamp/level
root.addHandler(h)
else:
# Portable fallback: INFO and below -> stdout, WARNING+ -> stderr
h_out = logging.StreamHandler(sys.stdout)
h_out.setLevel(logging.DEBUG)
h_out.addFilter(MaxLevelFilter(logging.INFO))
h_out.setFormatter(fmt)
h_err = logging.StreamHandler(sys.stderr)
h_err.setLevel(logging.WARNING)
h_err.setFormatter(fmt)
root.addHandler(h_out)
root.addHandler(h_err)
# Optional: make sure Python doesnt buffer output (useful on Windows/services)
os.environ.setdefault("PYTHONUNBUFFERED", "1")
def _fmt_ctx(ctx: dict) -> str:
# Build a single bracketed context block for the formatter: "[k=v a=b] "

28
utils/time.py Normal file
View File

@ -0,0 +1,28 @@
from __future__ import annotations
from datetime import time, date, datetime, timedelta
from typing import Tuple
from zoneinfo import ZoneInfo
WARSAW_TZ = ZoneInfo("Europe/Warsaw")
UTC = ZoneInfo("UTC")
TimeRange = Tuple[time, time]
def in_range_local(t_local: time, rng: TimeRange) -> bool:
"""[start, end) in local time, supports ranges crossing midnight."""
start, end = rng
if start <= end:
return start <= t_local < end
return (t_local >= start) or (t_local < end)
def local_midnight(d: date, tz: ZoneInfo) -> datetime:
"""Return local midnight (tz-aware) for a given calendar date."""
return datetime(d.year, d.month, d.day, 0, 0, tzinfo=tz)
def end_of_business_day_utc(business_day: date, tz: ZoneInfo) -> datetime:
"""
End of the business day [start, end) expressed in UTC.
For day D this is local midnight of D+1 converted to UTC.
Correct across 23h/25h DST days.
"""
end_local = local_midnight(business_day + timedelta(days=1), tz=tz)
return end_local.astimezone(UTC)

View File

@ -1,15 +0,0 @@
from __future__ import annotations
from datetime import time
from typing import Tuple
import zoneinfo
WARSAW_TZ = zoneinfo.ZoneInfo("Europe/Warsaw")
UTC = zoneinfo.ZoneInfo("UTC")
TimeRange = Tuple[time, time]
def in_range_local(t_local: time, rng: TimeRange) -> bool:
"""[start, end) in local time, supports ranges crossing midnight."""
start, end = rng
if start <= end:
return start <= t_local < end
return (t_local >= start) or (t_local < end)