refactor
This commit is contained in:
parent
572c301f2f
commit
afbe6b564a
@ -6,5 +6,7 @@
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.13 (entsoe_scrapper)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="module" module-name="ranczo-energy-usage-scraper" />
|
||||
<orderEntry type="module" module-name="ranczo-energy-costs" />
|
||||
</component>
|
||||
</module>
|
||||
@ -3,6 +3,8 @@
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/energy_price_scraper.iml" filepath="$PROJECT_DIR$/.idea/energy_price_scraper.iml" />
|
||||
<module fileurl="file://$PROJECT_DIR$/../ranczo-energy-costs/.idea/ranczo-energy-costs.iml" filepath="$PROJECT_DIR$/../ranczo-energy-costs/.idea/ranczo-energy-costs.iml" />
|
||||
<module fileurl="file://$PROJECT_DIR$/../ranczo-energy-usage-scraper/.idea/ranczo-energy-usage-scraper.iml" filepath="$PROJECT_DIR$/../ranczo-energy-usage-scraper/.idea/ranczo-energy-usage-scraper.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
@ -1,6 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
@ -5,7 +5,7 @@ from typing import Iterable, List, Tuple, Dict, Any, Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
import json
|
||||
import psycopg
|
||||
from utils.time_helpers import WARSAW_TZ
|
||||
from utils.time import WARSAW_TZ
|
||||
|
||||
IntervalRow = Tuple[datetime, datetime, float, str, str, str, str, str, str] # patrz _rows_to_upsert
|
||||
|
||||
|
||||
19
README.md
19
README.md
@ -1,19 +0,0 @@
|
||||
sudo apt update
|
||||
sudo apt install -y python3-venv python3-pip git tzdata
|
||||
|
||||
# system user without shell login
|
||||
sudo useradd -r -s /usr/sbin/nologin -d /opt/energy-scrapers energy
|
||||
|
||||
copy service to
|
||||
/etc/systemd/system/energy-price-scrapers.service
|
||||
|
||||
sudo install -m 0755 os/energy-price-scrapers-update.sh /usr/local/bin/energy-scrapers-update
|
||||
|
||||
as root
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl enable --now energy-price-scrapers.service
|
||||
systemctl status energy-price-scrapers.service
|
||||
~~~~
|
||||
logi
|
||||
journalctl -u energy-price-scrapers.service -f
|
||||
@ -3,7 +3,7 @@ from datetime import datetime, timedelta, date
|
||||
from typing import List, Tuple, Dict, Any
|
||||
import pandas as pd
|
||||
from EnergyPriceScraper import EnergyPriceScraperBase
|
||||
from utils.time_helpers import WARSAW_TZ, UTC
|
||||
from utils.time import WARSAW_TZ, UTC
|
||||
|
||||
# CSV pobrane z https://energy.instrat.pl/ceny/energia-rdn-godzinowe/
|
||||
class InstratRDN_CSVScraper(EnergyPriceScraperBase):
|
||||
|
||||
@ -4,9 +4,8 @@ from typing import List, Tuple, Dict, Any
|
||||
import requests
|
||||
|
||||
from EnergyPriceScraper import EnergyPriceScraperBase
|
||||
from utils.time_helpers import UTC, WARSAW_TZ
|
||||
|
||||
from logging_utils import HasLogger
|
||||
from utils.time import UTC, WARSAW_TZ
|
||||
from utils.logging import HasLogger
|
||||
|
||||
|
||||
class PSE_RCEScraper(EnergyPriceScraperBase, HasLogger):
|
||||
@ -61,15 +60,7 @@ class PSE_RCEScraper(EnergyPriceScraperBase, HasLogger):
|
||||
return out
|
||||
|
||||
def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||
start = datetime(
|
||||
year=business_day.year,
|
||||
month=business_day.month,
|
||||
day=business_day.day,
|
||||
hour=0,
|
||||
minute=0,
|
||||
second=0,
|
||||
tzinfo=tz
|
||||
).astimezone(UTC)
|
||||
start = datetime.combine(business_day, datetime.min.time(), tzinfo=tz).astimezone(UTC)
|
||||
end = start + timedelta(days=1)
|
||||
return self.fetch_range(start, end)
|
||||
|
||||
|
||||
@ -1,12 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, date, timezone
|
||||
from typing import List, Tuple, Dict, Any, Optional
|
||||
from typing import List, Tuple, Dict, Any
|
||||
import os
|
||||
import requests
|
||||
from EnergyPriceScraper import EnergyPriceScraperBase
|
||||
|
||||
from logging_utils import HasLogger
|
||||
from utils.time_helpers import UTC, WARSAW_TZ
|
||||
from utils.logging import HasLogger
|
||||
from utils.time import UTC, WARSAW_TZ
|
||||
|
||||
|
||||
class PstrykScraper(EnergyPriceScraperBase, HasLogger):
|
||||
@ -72,15 +73,7 @@ class PstrykScraper(EnergyPriceScraperBase, HasLogger):
|
||||
return out
|
||||
|
||||
def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||
start = datetime(
|
||||
year=business_day.year,
|
||||
month=business_day.month,
|
||||
day=business_day.day,
|
||||
hour=0,
|
||||
minute=0,
|
||||
second=0,
|
||||
tzinfo=tz
|
||||
).astimezone(UTC)
|
||||
start = datetime.combine(business_day, datetime.min.time(), tzinfo=tz).astimezone(UTC)
|
||||
|
||||
end = start + timedelta(days=1)
|
||||
return self.fetch_range(start, end)
|
||||
|
||||
72
app.py
72
app.py
@ -1,83 +1,39 @@
|
||||
import os
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from zoneinfo import ZoneInfo
|
||||
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
|
||||
import EnergyPriceScraperFactory
|
||||
|
||||
from logging_setup import setup_logging
|
||||
from utils.time_helpers import WARSAW_TZ
|
||||
from utils.logging import setup_logging
|
||||
from utils.time import WARSAW_TZ
|
||||
|
||||
from app_impl import hourly_tick
|
||||
from logging_utils import function_logger
|
||||
from utils.logging import function_logger
|
||||
|
||||
setup_logging(logging.DEBUG)
|
||||
sched = BlockingScheduler(timezone=WARSAW_TZ)
|
||||
|
||||
def run_pstryk():
|
||||
def run(name: str):
|
||||
log = function_logger()
|
||||
log.info("Running PSTRYK")
|
||||
log.info(f"Running {name}")
|
||||
try:
|
||||
conn = EnergyPriceScraperFactory.setup_db()
|
||||
pstryk_scraper = EnergyPriceScraperFactory.create("Pstryk", conn=conn)
|
||||
pstryk_scraper = EnergyPriceScraperFactory.create(name, conn=conn)
|
||||
# 11:30 and hourly safety check
|
||||
hourly_tick(pstryk_scraper)
|
||||
except Exception as e:
|
||||
log.error(f"PSTRYK throw an exception: {e}")
|
||||
log.error(f"{name} throw an exception: {e}")
|
||||
|
||||
|
||||
def run_pstryk():
|
||||
run("Pstryk")
|
||||
|
||||
|
||||
def run_PSE_RCE():
|
||||
log = function_logger()
|
||||
log.info("Running PSE-RCE")
|
||||
try:
|
||||
conn = EnergyPriceScraperFactory.setup_db()
|
||||
scraper = EnergyPriceScraperFactory.create("PSE_RCE", conn=conn)
|
||||
hourly_tick(scraper)
|
||||
except Exception as e:
|
||||
log.error(f"PSE-RCE throw an exception: {e}")
|
||||
run("PSE_RCE")
|
||||
|
||||
|
||||
# Daily triggers at exact local times (DST-safe)
|
||||
sched.add_job(run_pstryk, 'cron', hour='*', minute='30')
|
||||
sched.add_job(run_PSE_RCE, 'cron', hour='*', minute='30')
|
||||
sched.add_job(run_pstryk, 'cron', hour='*', minute='0')
|
||||
sched.add_job(run_PSE_RCE, 'cron', hour='*', minute='0')
|
||||
|
||||
sched.start()
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# setup_logging(logging.DEBUG)
|
||||
#
|
||||
# conn = EnergyPriceScraperFactory.setup_db()
|
||||
# # scraper = EnergyPriceScraperFactory.create("InstratRDN_CSV", conn=conn, path=path)
|
||||
# # scraper = EnergyPriceScraperFactory.create("PSE_RCE", conn=conn)
|
||||
# scraper = EnergyPriceScraperFactory.create("Pstryk", conn=conn)
|
||||
|
||||
# distribution_cost = DistributionCostFactory.create("TauronG13")
|
||||
# energy_price = DynamicPricesProvider.DynamicPricesProvider(PROVIDER="instrat", conn=conn, KIND="fixing I", SIDE="buy")
|
||||
#
|
||||
# start = datetime(2025, 6, 27, 0, 0) # lokalny czas PL (jeśli naive – funkcja sama doda WAW)
|
||||
#
|
||||
# rows = []
|
||||
# for h in range(24*3):
|
||||
# ts = start + timedelta(hours=h)
|
||||
# dist = distribution_cost.rate(ts)
|
||||
# price = energy_price.rate(ts)
|
||||
# pstryk_fee = 0.08
|
||||
#
|
||||
# rows.append({
|
||||
# "ts": ts,
|
||||
# "Dystrybucja (net)": dist,
|
||||
# "Energia (net)": price,
|
||||
# "Pstryk (net)" : pstryk_fee,
|
||||
# "Podatki": (distribution_cost.rate(ts) + price + pstryk_fee) * 1.23 - (distribution_cost.rate(ts) + price + pstryk_fee),
|
||||
# })
|
||||
#
|
||||
# fig, ax = plot_stacked_with_negatives(
|
||||
# rows,
|
||||
# title="Składowe ceny 1 kWh",
|
||||
# order=["Dystrybucja (net)", "Energia (net)", "Podatki"] # opcjonalnie
|
||||
# )
|
||||
#
|
||||
# plt.show()
|
||||
|
||||
|
||||
20
app_impl.py
20
app_impl.py
@ -1,23 +1,11 @@
|
||||
from __future__ import annotations
|
||||
from datetime import datetime, date, time, timedelta, timezone
|
||||
from datetime import datetime, date, timedelta
|
||||
from zoneinfo import ZoneInfo
|
||||
from typing import Sequence, Tuple, Optional
|
||||
from typing import Optional
|
||||
|
||||
from logging_utils import function_logger
|
||||
from utils.time_helpers import WARSAW_TZ, UTC
|
||||
from utils.logging import function_logger
|
||||
from utils.time import WARSAW_TZ, end_of_business_day_utc
|
||||
|
||||
def local_midnight(d: date, tz: ZoneInfo) -> datetime:
|
||||
"""Return local midnight (tz-aware) for a given calendar date."""
|
||||
return datetime(d.year, d.month, d.day, 0, 0, tzinfo=tz)
|
||||
|
||||
def end_of_business_day_utc(business_day: date, tz: ZoneInfo) -> datetime:
|
||||
"""
|
||||
End of the business day [start, end) expressed in UTC.
|
||||
For day D this is local midnight of D+1 converted to UTC.
|
||||
Correct across 23h/25h DST days.
|
||||
"""
|
||||
end_local = local_midnight(business_day + timedelta(days=1), tz=tz)
|
||||
return end_local.astimezone(UTC)
|
||||
|
||||
def is_day_fully_ingested(scraper, business_day: date, tz: ZoneInfo) -> bool:
|
||||
"""
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
import logging
|
||||
import logging.config
|
||||
import logging, os, sys, platform
|
||||
try:
|
||||
# Only available on Linux with systemd
|
||||
from systemd.journal import JournalHandler # type: ignore
|
||||
HAS_JOURNAL = True
|
||||
except Exception:
|
||||
HAS_JOURNAL = False
|
||||
|
||||
|
||||
class MaxLevelFilter(logging.Filter):
|
||||
"""Allow records up to a certain level (inclusive)."""
|
||||
def __init__(self, level): super().__init__(); self.level = level
|
||||
def filter(self, record): return record.levelno <= self.level
|
||||
|
||||
|
||||
class EnsureContext(logging.Filter):
|
||||
"""Always provide 'context' so the formatter never KeyErrors."""
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
if not hasattr(record, "context"):
|
||||
record.context = ""
|
||||
return True
|
||||
|
||||
def setup_logging(level: int = logging.INFO, syslog_id: str = "energy-scrapers") -> None:
|
||||
"""Use journald if available; otherwise split stdout/stderr (Windows-friendly)."""
|
||||
root = logging.getLogger()
|
||||
root.handlers.clear()
|
||||
root.setLevel(level)
|
||||
|
||||
fmt = logging.Formatter("%(asctime)s %(levelname)s [%(name)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
||||
|
||||
if HAS_JOURNAL and platform.system() == "Linux":
|
||||
# Native journald handler (preserves levels/metadata)
|
||||
h = JournalHandler(SYSLOG_IDENTIFIER=syslog_id)
|
||||
h.setFormatter(logging.Formatter("%(message)s")) # journald adds timestamp/level
|
||||
root.addHandler(h)
|
||||
else:
|
||||
# Portable fallback: INFO and below -> stdout, WARNING+ -> stderr
|
||||
h_out = logging.StreamHandler(sys.stdout)
|
||||
h_out.setLevel(logging.DEBUG)
|
||||
h_out.addFilter(MaxLevelFilter(logging.INFO))
|
||||
h_out.setFormatter(fmt)
|
||||
|
||||
h_err = logging.StreamHandler(sys.stderr)
|
||||
h_err.setLevel(logging.WARNING)
|
||||
h_err.setFormatter(fmt)
|
||||
|
||||
root.addHandler(h_out)
|
||||
root.addHandler(h_err)
|
||||
|
||||
# Optional: make sure Python doesn’t buffer output (useful on Windows/services)
|
||||
os.environ.setdefault("PYTHONUNBUFFERED", "1")
|
||||
@ -1,6 +1,57 @@
|
||||
import logging
|
||||
from contextlib import contextmanager
|
||||
import inspect
|
||||
import logging.config
|
||||
import logging, os, sys, platform
|
||||
try:
|
||||
# Only available on Linux with systemd
|
||||
from systemd.journal import JournalHandler # type: ignore
|
||||
HAS_JOURNAL = True
|
||||
except Exception:
|
||||
HAS_JOURNAL = False
|
||||
|
||||
|
||||
class MaxLevelFilter(logging.Filter):
|
||||
"""Allow records up to a certain level (inclusive)."""
|
||||
def __init__(self, level): super().__init__(); self.level = level
|
||||
def filter(self, record): return record.levelno <= self.level
|
||||
|
||||
|
||||
class EnsureContext(logging.Filter):
|
||||
"""Always provide 'context' so the formatter never KeyErrors."""
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
if not hasattr(record, "context"):
|
||||
record.context = ""
|
||||
return True
|
||||
|
||||
def setup_logging(level: int = logging.INFO, syslog_id: str = "energy-scrapers") -> None:
|
||||
"""Use journald if available; otherwise split stdout/stderr (Windows-friendly)."""
|
||||
root = logging.getLogger()
|
||||
root.handlers.clear()
|
||||
root.setLevel(level)
|
||||
|
||||
fmt = logging.Formatter("%(asctime)s %(levelname)s [%(name)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
||||
|
||||
if HAS_JOURNAL and platform.system() == "Linux":
|
||||
# Native journald handler (preserves levels/metadata)
|
||||
h = JournalHandler(SYSLOG_IDENTIFIER=syslog_id)
|
||||
h.setFormatter(logging.Formatter("%(message)s")) # journald adds timestamp/level
|
||||
root.addHandler(h)
|
||||
else:
|
||||
# Portable fallback: INFO and below -> stdout, WARNING+ -> stderr
|
||||
h_out = logging.StreamHandler(sys.stdout)
|
||||
h_out.setLevel(logging.DEBUG)
|
||||
h_out.addFilter(MaxLevelFilter(logging.INFO))
|
||||
h_out.setFormatter(fmt)
|
||||
|
||||
h_err = logging.StreamHandler(sys.stderr)
|
||||
h_err.setLevel(logging.WARNING)
|
||||
h_err.setFormatter(fmt)
|
||||
|
||||
root.addHandler(h_out)
|
||||
root.addHandler(h_err)
|
||||
|
||||
# Optional: make sure Python doesn’t buffer output (useful on Windows/services)
|
||||
os.environ.setdefault("PYTHONUNBUFFERED", "1")
|
||||
|
||||
def _fmt_ctx(ctx: dict) -> str:
|
||||
# Build a single bracketed context block for the formatter: "[k=v a=b] "
|
||||
28
utils/time.py
Normal file
28
utils/time.py
Normal file
@ -0,0 +1,28 @@
|
||||
from __future__ import annotations
|
||||
from datetime import time, date, datetime, timedelta
|
||||
from typing import Tuple
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
WARSAW_TZ = ZoneInfo("Europe/Warsaw")
|
||||
UTC = ZoneInfo("UTC")
|
||||
TimeRange = Tuple[time, time]
|
||||
|
||||
def in_range_local(t_local: time, rng: TimeRange) -> bool:
|
||||
"""[start, end) in local time, supports ranges crossing midnight."""
|
||||
start, end = rng
|
||||
if start <= end:
|
||||
return start <= t_local < end
|
||||
return (t_local >= start) or (t_local < end)
|
||||
|
||||
def local_midnight(d: date, tz: ZoneInfo) -> datetime:
|
||||
"""Return local midnight (tz-aware) for a given calendar date."""
|
||||
return datetime(d.year, d.month, d.day, 0, 0, tzinfo=tz)
|
||||
|
||||
def end_of_business_day_utc(business_day: date, tz: ZoneInfo) -> datetime:
|
||||
"""
|
||||
End of the business day [start, end) expressed in UTC.
|
||||
For day D this is local midnight of D+1 converted to UTC.
|
||||
Correct across 23h/25h DST days.
|
||||
"""
|
||||
end_local = local_midnight(business_day + timedelta(days=1), tz=tz)
|
||||
return end_local.astimezone(UTC)
|
||||
@ -1,15 +0,0 @@
|
||||
from __future__ import annotations
|
||||
from datetime import time
|
||||
from typing import Tuple
|
||||
import zoneinfo
|
||||
|
||||
WARSAW_TZ = zoneinfo.ZoneInfo("Europe/Warsaw")
|
||||
UTC = zoneinfo.ZoneInfo("UTC")
|
||||
TimeRange = Tuple[time, time]
|
||||
|
||||
def in_range_local(t_local: time, rng: TimeRange) -> bool:
|
||||
"""[start, end) in local time, supports ranges crossing midnight."""
|
||||
start, end = rng
|
||||
if start <= end:
|
||||
return start <= t_local < end
|
||||
return (t_local >= start) or (t_local < end)
|
||||
Loading…
Reference in New Issue
Block a user