Add data scrappers
This commit is contained in:
parent
2aec71ce49
commit
01f4f569f4
@ -1,4 +1,3 @@
|
|||||||
# DistributionProvider/TauronG13sProvider.py
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import time, datetime
|
from datetime import time, datetime
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
# EnergyPriceProvider/RDNProvider.py
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
# EnergyPriceProvider/TauronG13Provider.py
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from datetime import time, datetime
|
from datetime import time, datetime
|
||||||
from EnergyPrice import EnergyPriceBase
|
from EnergyPrice import EnergyPriceBase
|
||||||
|
|||||||
145
EnergyPriceScraper.py
Normal file
145
EnergyPriceScraper.py
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime, date, timedelta
|
||||||
|
from typing import Iterable, List, Tuple, Dict, Any, Optional
|
||||||
|
from zoneinfo import ZoneInfo
|
||||||
|
import json
|
||||||
|
import psycopg
|
||||||
|
import time as _time
|
||||||
|
|
||||||
|
WAW = ZoneInfo("Europe/Warsaw")
|
||||||
|
IntervalRow = Tuple[datetime, datetime, float, str, str, str, str, str, str] # patrz _rows_to_upsert
|
||||||
|
|
||||||
|
UPSERT_SQL = """
|
||||||
|
INSERT INTO pricing.energy_prices
|
||||||
|
(ts_start, ts_end, price_pln_net, provider, kind, side, buyer, seller, source_meta)
|
||||||
|
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
||||||
|
ON CONFLICT (ts_start, ts_end, provider, kind, side)
|
||||||
|
DO UPDATE SET
|
||||||
|
price_pln_net = EXCLUDED.price_pln_net,
|
||||||
|
buyer = EXCLUDED.buyer,
|
||||||
|
seller = EXCLUDED.seller,
|
||||||
|
source_meta = COALESCE(pricing.energy_prices.source_meta, '{}'::jsonb)
|
||||||
|
|| COALESCE(EXCLUDED.source_meta, '{}'::jsonb),
|
||||||
|
inserted_at = now();
|
||||||
|
"""
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class EnergyPriceScraperBase:
|
||||||
|
"""Bazowa klasa dla scraperów rynkowych (zbieranie → normalizacja → UPSERT)."""
|
||||||
|
dsn: Optional[str] = None
|
||||||
|
conn: Optional[psycopg.Connection] = None
|
||||||
|
tz: ZoneInfo = WAW
|
||||||
|
period: timedelta = timedelta(hours=1)
|
||||||
|
# identyfikatory – NADPISZ w podklasie:
|
||||||
|
PROVIDER: str = "" # np. 'PSE' / 'instrat' / 'PSTRYK'
|
||||||
|
KIND: str = "" # np. 'rce' / 'fixing_I' / 'market_price'
|
||||||
|
SIDE: str = "buy" # 'buy'|'sell'
|
||||||
|
BUYER: str = "end_user"
|
||||||
|
SELLER: str = "market_index"
|
||||||
|
|
||||||
|
# throttling/retry
|
||||||
|
max_retries: int = 3
|
||||||
|
backoff_sec: float = 1.0
|
||||||
|
|
||||||
|
# ---------- public API ----------
|
||||||
|
def ingest_day(self, business_day: date) -> int:
|
||||||
|
"""Pobiera i zapisuje całą dobę [00:00, 24:00) lokalnie. Zwraca liczbę upsertowanych wierszy."""
|
||||||
|
points = self.fetch_day(business_day) # [(start,end,price,meta_dict), ...]
|
||||||
|
rows = self._rows_to_upsert(points)
|
||||||
|
return self._upsert(rows)
|
||||||
|
|
||||||
|
def ingest_range(self, start_day: date, end_day: date) -> int:
|
||||||
|
"""Backfill: [start_day, end_day] po dniach lokalnych."""
|
||||||
|
total = 0
|
||||||
|
d = start_day
|
||||||
|
while d <= end_day:
|
||||||
|
total += self.ingest_day(d)
|
||||||
|
d = d + timedelta(days=1)
|
||||||
|
return total
|
||||||
|
|
||||||
|
def fetch_day(self, business_day: date) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||||
|
"""Zaimplementuj w podklasie. Zwracaj listę punktów z NETTO PLN/kWh."""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
# ---------- helpers ----------
|
||||||
|
def _ensure_conn(self) -> psycopg.Connection:
|
||||||
|
if self.conn:
|
||||||
|
return self.conn
|
||||||
|
if not self.dsn:
|
||||||
|
raise RuntimeError("Podaj dsn= lub conn= dla PriceScraperBase")
|
||||||
|
self.conn = psycopg.connect(self.dsn)
|
||||||
|
return self.conn
|
||||||
|
|
||||||
|
def _rows_to_upsert(self, points: Iterable[Tuple[datetime, datetime, float, Dict[str, Any]]]) -> List[IntervalRow]:
|
||||||
|
rows: List[IntervalRow] = []
|
||||||
|
for ts_start, ts_end, price_pln_kwh_net, meta in points:
|
||||||
|
# sanity: TZ
|
||||||
|
if ts_start.tzinfo is None: ts_start = ts_start.replace(tzinfo=self.tz)
|
||||||
|
else: ts_start = ts_start.astimezone(self.tz)
|
||||||
|
if ts_end.tzinfo is None: ts_end = ts_end.replace(tzinfo=self.tz)
|
||||||
|
else: ts_end = ts_end.astimezone(self.tz)
|
||||||
|
rows.append((
|
||||||
|
ts_start, ts_end, float(price_pln_kwh_net),
|
||||||
|
self.PROVIDER, self.KIND, self.SIDE, self.BUYER, self.SELLER,
|
||||||
|
json.dumps(meta or {})
|
||||||
|
))
|
||||||
|
return rows
|
||||||
|
|
||||||
|
def _upsert(self, rows: List[IntervalRow]) -> int:
|
||||||
|
if not rows: return 0
|
||||||
|
for attempt in range(1, self.max_retries + 1):
|
||||||
|
try:
|
||||||
|
with self._ensure_conn().cursor() as cur:
|
||||||
|
cur.executemany(UPSERT_SQL, rows)
|
||||||
|
self._ensure_conn().commit()
|
||||||
|
return len(rows)
|
||||||
|
except Exception:
|
||||||
|
if attempt >= self.max_retries:
|
||||||
|
raise
|
||||||
|
_time.sleep(self.backoff_sec * attempt)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# małe ułatwienie do budowy godzin z doby
|
||||||
|
def _day_range(self, d: date) -> Tuple[datetime, datetime]:
|
||||||
|
start = datetime(d.year, d.month, d.day, 0, 0, tzinfo=self.tz)
|
||||||
|
return start, start + timedelta(days=1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def rows_from_series(series_pln_per_kwh: pd.Series, provider: str, kind: str,
|
||||||
|
period: timedelta = timedelta(hours=1), meta: dict | None = None):
|
||||||
|
"""
|
||||||
|
Zamienia serię godzinową (index = start okresu, tz-aware) na listę wierszy dla upsertu.
|
||||||
|
"""
|
||||||
|
if series_pln_per_kwh.empty:
|
||||||
|
return []
|
||||||
|
|
||||||
|
s = series_pln_per_kwh.copy()
|
||||||
|
idx = s.index
|
||||||
|
if getattr(idx, "tz", None) is None:
|
||||||
|
idx = idx.tz_localize(TZ, nonexistent="shift_forward", ambiguous="infer")
|
||||||
|
s.index = idx
|
||||||
|
else:
|
||||||
|
s = s.tz_convert(TZ)
|
||||||
|
|
||||||
|
meta_json = json.dumps(meta or {})
|
||||||
|
rows = []
|
||||||
|
for ts_start, price in s.dropna().items():
|
||||||
|
ts_end = ts_start + period
|
||||||
|
rows.append((ts_start, ts_end, float(price), provider, kind, meta_json))
|
||||||
|
return rows
|
||||||
|
|
||||||
|
def upsert_energy_prices(conn, rows):
|
||||||
|
"""
|
||||||
|
rows: iterable krotek:
|
||||||
|
(ts_start, ts_end, price_pln_net, provider, kind, source_meta_json)
|
||||||
|
"""
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.executemany(UPSERT_SQL, rows)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
conn = setup_db()
|
||||||
48
EnergyPriceScraperFactory.py
Normal file
48
EnergyPriceScraperFactory.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# EnergyPriceScraperFactory.py
|
||||||
|
from __future__ import annotations
|
||||||
|
import importlib
|
||||||
|
from typing import Any, cast, Type
|
||||||
|
from EnergyPriceScraper import EnergyPriceScraperBase
|
||||||
|
|
||||||
|
import os
|
||||||
|
import psycopg
|
||||||
|
|
||||||
|
DB_HOST = os.getenv("PGHOST", "192.168.30.10")
|
||||||
|
DB_PORT = int(os.getenv("PGPORT", "5432"))
|
||||||
|
DB_NAME = os.getenv("PGDATABASE", "postgres")
|
||||||
|
DB_USER = os.getenv("PGUSER", "energy_ingest")
|
||||||
|
DB_PASS = os.getenv("PGPASSWORD", "2f1rLCa03mQrbmlCbD6envk")
|
||||||
|
|
||||||
|
def setup_db():
|
||||||
|
# psycopg 3
|
||||||
|
conn = psycopg.connect(
|
||||||
|
host=DB_HOST, port=DB_PORT, dbname=DB_NAME, user=DB_USER, password=DB_PASS
|
||||||
|
)
|
||||||
|
return conn
|
||||||
|
|
||||||
|
def create(name: str, /, **kwargs: Any) -> EnergyPriceScraperBase:
|
||||||
|
"""
|
||||||
|
Convention:
|
||||||
|
module: Scraper.<Name>Scraper
|
||||||
|
class: <Name>Provider
|
||||||
|
Example: create("TauronG13", rates={...})
|
||||||
|
"""
|
||||||
|
safe = "".join(ch for ch in name if ch.isalnum() or ch == "_")
|
||||||
|
module_name = f"Scraper.{safe}Scraper"
|
||||||
|
class_name = f"{safe}Scraper"
|
||||||
|
|
||||||
|
try:
|
||||||
|
mod = importlib.import_module(module_name)
|
||||||
|
except ModuleNotFoundError as e:
|
||||||
|
raise ValueError(f"Scraper module not found: {module_name}") from e
|
||||||
|
|
||||||
|
try:
|
||||||
|
cls = getattr(mod, class_name)
|
||||||
|
except AttributeError as e:
|
||||||
|
raise ValueError(f"Scraper class not found: {class_name} in {module_name}") from e
|
||||||
|
|
||||||
|
if not issubclass(cls, EnergyPriceScraperBase):
|
||||||
|
raise TypeError(f"{class_name} must inherit PriceScraperBase")
|
||||||
|
|
||||||
|
ProviderCls = cast(Type[EnergyPriceScraperBase], cls)
|
||||||
|
return ProviderCls(**kwargs) # type: ignore[arg-type]
|
||||||
@ -1,72 +0,0 @@
|
|||||||
import os
|
|
||||||
from datetime import timedelta
|
|
||||||
import json
|
|
||||||
import psycopg
|
|
||||||
import pandas as pd
|
|
||||||
import zoneinfo
|
|
||||||
|
|
||||||
TZ = zoneinfo.ZoneInfo("Europe/Warsaw")
|
|
||||||
|
|
||||||
DB_HOST = os.getenv("PGHOST", "192.168.30.10")
|
|
||||||
DB_PORT = int(os.getenv("PGPORT", "5432"))
|
|
||||||
DB_NAME = os.getenv("PGDATABASE", "postgres")
|
|
||||||
DB_USER = os.getenv("PGUSER", "energy_ingest")
|
|
||||||
DB_PASS = os.getenv("PGPASSWORD", "2f1rLCa03mQrbmlCbD6envk")
|
|
||||||
|
|
||||||
UPSERT_SQL = """
|
|
||||||
INSERT INTO pricing.energy_prices
|
|
||||||
(ts_start, ts_end, price_pln_net, provider, kind, side, buyer, seller, source_meta)
|
|
||||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
|
|
||||||
ON CONFLICT (ts_start, ts_end, provider, kind, side)
|
|
||||||
DO UPDATE SET
|
|
||||||
price_pln_net = EXCLUDED.price_pln_net,
|
|
||||||
buyer = EXCLUDED.buyer,
|
|
||||||
seller = EXCLUDED.seller,
|
|
||||||
source_meta = COALESCE(pricing.energy_prices.source_meta, '{}'::jsonb)
|
|
||||||
|| COALESCE(EXCLUDED.source_meta, '{}'::jsonb),
|
|
||||||
inserted_at = now();
|
|
||||||
"""
|
|
||||||
|
|
||||||
def setup_db():
|
|
||||||
# psycopg 3
|
|
||||||
conn = psycopg.connect(
|
|
||||||
host=DB_HOST, port=DB_PORT, dbname=DB_NAME, user=DB_USER, password=DB_PASS
|
|
||||||
)
|
|
||||||
return conn
|
|
||||||
|
|
||||||
def rows_from_series(series_pln_per_kwh: pd.Series, provider: str, kind: str,
|
|
||||||
period: timedelta = timedelta(hours=1), meta: dict | None = None):
|
|
||||||
"""
|
|
||||||
Zamienia serię godzinową (index = start okresu, tz-aware) na listę wierszy dla upsertu.
|
|
||||||
"""
|
|
||||||
if series_pln_per_kwh.empty:
|
|
||||||
return []
|
|
||||||
|
|
||||||
s = series_pln_per_kwh.copy()
|
|
||||||
idx = s.index
|
|
||||||
if getattr(idx, "tz", None) is None:
|
|
||||||
idx = idx.tz_localize(TZ, nonexistent="shift_forward", ambiguous="infer")
|
|
||||||
s.index = idx
|
|
||||||
else:
|
|
||||||
s = s.tz_convert(TZ)
|
|
||||||
|
|
||||||
meta_json = json.dumps(meta or {})
|
|
||||||
rows = []
|
|
||||||
for ts_start, price in s.dropna().items():
|
|
||||||
ts_end = ts_start + period
|
|
||||||
rows.append((ts_start, ts_end, float(price), provider, kind, meta_json))
|
|
||||||
return rows
|
|
||||||
|
|
||||||
def upsert_energy_prices(conn, rows):
|
|
||||||
"""
|
|
||||||
rows: iterable krotek:
|
|
||||||
(ts_start, ts_end, price_pln_net, provider, kind, source_meta_json)
|
|
||||||
"""
|
|
||||||
if not rows:
|
|
||||||
return
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.executemany(UPSERT_SQL, rows)
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
conn = setup_db()
|
|
||||||
78
Scraper/InstratRDN_CSVScraper.py
Normal file
78
Scraper/InstratRDN_CSVScraper.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
from datetime import datetime, timedelta, date
|
||||||
|
from typing import List, Tuple, Dict, Any
|
||||||
|
import pandas as pd
|
||||||
|
from EnergyPriceScraper import EnergyPriceScraperBase
|
||||||
|
from utils.time_helpers import WARSAW_TZ
|
||||||
|
|
||||||
|
class InstratRDN_CSVScraper(EnergyPriceScraperBase):
|
||||||
|
"""
|
||||||
|
Przykładowy scraper RDN z CSV/JSON (public HTTP).
|
||||||
|
Oczekuje CSV z kolumnami: 'date', 'fixing_i_price' (PLN/MWh) lub już PLN/kWh.
|
||||||
|
"""
|
||||||
|
PROVIDER = "instrat"
|
||||||
|
KIND = "fixing_I"
|
||||||
|
SIDE = "buy"
|
||||||
|
BUYER = "end_user"
|
||||||
|
SELLER = "market_index"
|
||||||
|
|
||||||
|
url: str
|
||||||
|
|
||||||
|
def __init__(self, path: str, **kwargs):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self.data = self.load_instrat_csv(path)
|
||||||
|
|
||||||
|
def load_instrat_csv(self, path: str) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Wczytuje CSV Instrat z format
|
||||||
|
date,fixing_i_price,fixing_i_volume,fixing_ii_price,fixing_ii_volume
|
||||||
|
01.01.2016 00:00,108.27,2565.10,108.55,89.10
|
||||||
|
"""
|
||||||
|
# 1) Wczytanie z autodetekcją polskiego formatu
|
||||||
|
dateparse = lambda x: datetime.strptime(x, '%d.%m.%Y %H:%M').replace(tzinfo=WARSAW_TZ)
|
||||||
|
df = pd.read_csv(path, parse_dates=['date'], date_parser=dateparse)
|
||||||
|
|
||||||
|
fi_pln_kwh = (df["fixing_i_price"] / 1000.0).round(4)
|
||||||
|
fii_pln_kwh = (df["fixing_ii_price"] / 1000.0).round(4)
|
||||||
|
|
||||||
|
self.out = pd.DataFrame({
|
||||||
|
"fixing_i_pln_kwh": fi_pln_kwh.values,
|
||||||
|
"fixing_ii_pln_kwh": fii_pln_kwh.values,
|
||||||
|
"fixing_i_volume": pd.to_numeric(df.get("fixing_i_volume"), errors="coerce").values,
|
||||||
|
"fixing_ii_volume": pd.to_numeric(df.get("fixing_ii_volume"), errors="coerce").values,
|
||||||
|
}, index=df["date"]).sort_index()
|
||||||
|
|
||||||
|
# sanity check — nie wyszło pusto
|
||||||
|
if self.out[["fixing_i_pln_kwh", "fixing_ii_pln_kwh"]].notna().sum().sum() == 0:
|
||||||
|
raise RuntimeError("Brak cen po przeliczeniu — sprawdź separator/format liczb w CSV.")
|
||||||
|
|
||||||
|
def fetch_day(self, business_day: date) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||||
|
if not hasattr(self, "out"):
|
||||||
|
raise RuntimeError("Brak danych: najpierw wczytaj CSV i zbuduj self.out")
|
||||||
|
|
||||||
|
# wybór kolumny wg KIND (domyślnie Fixing I)
|
||||||
|
kind = getattr(self, "KIND", "fixing_I")
|
||||||
|
kind_norm = str(kind).replace(" ", "_").lower()
|
||||||
|
if "fixing_ii" in kind_norm:
|
||||||
|
col = "fixing_ii_pln_kwh"
|
||||||
|
fixing_tag = "II"
|
||||||
|
else:
|
||||||
|
col = "fixing_i_pln_kwh"
|
||||||
|
fixing_tag = "I"
|
||||||
|
|
||||||
|
# zakres doby lokalnej
|
||||||
|
day_start = datetime(business_day.year, business_day.month, business_day.day, 0, 0, tzinfo=self.tz)
|
||||||
|
day_end = day_start + timedelta(days=1)
|
||||||
|
|
||||||
|
# filtr i emisja punktów
|
||||||
|
df_day = self.out.loc[(self.out.index >= day_start) & (self.out.index < day_end)]
|
||||||
|
if col not in df_day.columns:
|
||||||
|
raise KeyError(f"Kolumna '{col}' nie istnieje w self.out")
|
||||||
|
|
||||||
|
points: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
|
||||||
|
for ts, price in df_day[col].dropna().items():
|
||||||
|
ts_end = ts + getattr(self, "period", timedelta(hours=1))
|
||||||
|
points.append((ts.to_pydatetime(), ts_end.to_pydatetime(), float(price),
|
||||||
|
{"source": "instrat_csv", "unit": "PLN/kWh", "fixing": fixing_tag, "taxes_included": False}))
|
||||||
|
return points
|
||||||
|
|
||||||
46
Scraper/PSE_RCEScraper.py
Normal file
46
Scraper/PSE_RCEScraper.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
from datetime import datetime, timedelta, date
|
||||||
|
from typing import List, Tuple, Dict, Any
|
||||||
|
import requests
|
||||||
|
from EnergyPriceScraper import EnergyPriceScraperBase, WAW
|
||||||
|
|
||||||
|
|
||||||
|
class PSE_RCEScraper(EnergyPriceScraperBase):
|
||||||
|
"""
|
||||||
|
PSE RCE (PLN) – godziny dla danej doby.
|
||||||
|
Zwraca NETTO PLN/kWh (jeżeli RCE jest w PLN/MWh, dzielimy przez 1000).
|
||||||
|
"""
|
||||||
|
PROVIDER = "PSE"
|
||||||
|
KIND = "rce"
|
||||||
|
SIDE = "sell"
|
||||||
|
BUYER = "reteiler" # sprzedawca rozliczajacy prosumenta
|
||||||
|
SELLER = "prosumer"
|
||||||
|
|
||||||
|
api_url: str = "https://api.raporty.pse.pl/api/rce-pln"
|
||||||
|
session: requests.Session
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self.session = requests.Session()
|
||||||
|
self.session.headers.update({"accept": "application/json"})
|
||||||
|
|
||||||
|
def fetch_day(self, business_day: date) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||||
|
# RCE v2: filter by business_date, select rce_pln,dtime,period
|
||||||
|
params = {
|
||||||
|
"$select": "rce_pln,dtime,period",
|
||||||
|
"$filter": f"business_date eq '{business_day:%Y-%m-%d}'",
|
||||||
|
}
|
||||||
|
r = self.session.get(self.api_url, params=params, timeout=30)
|
||||||
|
r.raise_for_status()
|
||||||
|
data = r.json().get("value", [])
|
||||||
|
|
||||||
|
out: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
|
||||||
|
for item in data:
|
||||||
|
# dtime to ISO; period (w minutach) bywa 60
|
||||||
|
ts = datetime.fromisoformat(item["dtime"]).astimezone(WAW)
|
||||||
|
per_min = int(item.get("period", 60))
|
||||||
|
ts_end = ts + timedelta(minutes=per_min)
|
||||||
|
price_pln_mwh = float(item["rce_pln"])
|
||||||
|
price_pln_kwh = price_pln_mwh / 1000.0 # NETTO PLN/kWh
|
||||||
|
out.append((ts, ts_end, price_pln_kwh, {"source": "PSE_RCE_v2"}))
|
||||||
|
return out
|
||||||
49
Scraper/PstrykScraper.py
Normal file
49
Scraper/PstrykScraper.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
from datetime import datetime, timedelta, date
|
||||||
|
from typing import List, Tuple, Dict, Any, Optional
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
from EnergyPriceScraper import EnergyPriceScraperBase, WAW
|
||||||
|
|
||||||
|
class PstrykScraper(EnergyPriceScraperBase):
|
||||||
|
"""
|
||||||
|
Szablon: ceny publikowane przez sprzedawcę (Pstryk).
|
||||||
|
Załóż: Bearer token w ENV PSTRYK_TOKEN, endpoint w ENV PSTRYK_API_BASE, np.:
|
||||||
|
PSTRYK_API_BASE=https://api.pstryk.example.com
|
||||||
|
Endpoint (przykład): GET /prices?date=YYYY-MM-DD
|
||||||
|
-> [{"ts":"2025-08-27T00:00:00+02:00","net_pln_kwh":0.44}, ...]
|
||||||
|
"""
|
||||||
|
PROVIDER = "PSTRYK"
|
||||||
|
KIND = "market_price"
|
||||||
|
SIDE = "buy"
|
||||||
|
BUYER = "end_user"
|
||||||
|
SELLER = "PSTRYK"
|
||||||
|
|
||||||
|
api_base: str
|
||||||
|
token: str
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self.api_base = os.getenv("PSTRYK_API_BASE", "").rstrip("/")
|
||||||
|
self.token = os.getenv("PSTRYK_TOKEN", "")
|
||||||
|
if not self.api_base or not self.token:
|
||||||
|
raise RuntimeError("Ustaw PSTRYK_API_BASE i PSTRYK_TOKEN w środowisku.")
|
||||||
|
self.session = requests.Session()
|
||||||
|
self.session.headers.update({
|
||||||
|
"accept": "application/json",
|
||||||
|
"authorization": f"Bearer {self.token}",
|
||||||
|
"user-agent": "energy-scraper/1.0",
|
||||||
|
})
|
||||||
|
|
||||||
|
def fetch_day(self, business_day: date) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||||
|
url = f"{self.api_base}/prices"
|
||||||
|
r = self.session.get(url, params={"date": f"{business_day:%Y-%m-%d}"}, timeout=30)
|
||||||
|
r.raise_for_status()
|
||||||
|
data = r.json()
|
||||||
|
|
||||||
|
out: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
|
||||||
|
for item in data:
|
||||||
|
ts = datetime.fromisoformat(item["ts"]).astimezone(WAW)
|
||||||
|
p = float(item["net_pln_kwh"])
|
||||||
|
out.append((ts, ts + self.period, p, {"source": "pstryk_api"}))
|
||||||
|
return out
|
||||||
1
Scraper/__init__.py
Normal file
1
Scraper/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
# (left intentionally empty; factory imports modules dynamically)
|
||||||
54
main.py
54
main.py
@ -1,56 +1,22 @@
|
|||||||
# pip install pandas python-dateutil
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import zoneinfo
|
|
||||||
|
|
||||||
TZ = zoneinfo.ZoneInfo("Europe/Warsaw")
|
|
||||||
|
|
||||||
import DistributionCostFactory
|
import DistributionCostFactory
|
||||||
from EnergyPriceProvider import DynamicPricesProvider
|
from EnergyPriceProvider import DynamicPricesProvider
|
||||||
from plot_cost_breakdown import plot_stacked_with_negatives
|
from plot_cost_breakdown import plot_stacked_with_negatives
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
def load_instrat_csv(path: str) -> pd.DataFrame:
|
import EnergyPriceScraperFactory
|
||||||
"""
|
|
||||||
Wczytuje CSV Instrat z format
|
|
||||||
date,fixing_i_price,fixing_i_volume,fixing_ii_price,fixing_ii_volume
|
|
||||||
01.01.2016 00:00,108.27,2565.10,108.55,89.10
|
|
||||||
"""
|
|
||||||
# 1) Wczytanie z autodetekcją polskiego formatu
|
|
||||||
dateparse = lambda x: datetime.strptime(x, '%d.%m.%Y %H:%M').replace(tzinfo=TZ)
|
|
||||||
df = pd.read_csv(path, parse_dates=['date'], date_parser=dateparse)
|
|
||||||
|
|
||||||
fi_pln_kwh = (df["fixing_i_price"] / 1000.0).round(4)
|
|
||||||
fii_pln_kwh = (df["fixing_ii_price"] / 1000.0).round(4)
|
|
||||||
|
|
||||||
out = pd.DataFrame({
|
|
||||||
"fixing_i_pln_kwh": fi_pln_kwh.values,
|
|
||||||
"fixing_ii_pln_kwh": fii_pln_kwh.values,
|
|
||||||
"fixing_i_volume": pd.to_numeric(df.get("fixing_i_volume"), errors="coerce").values,
|
|
||||||
"fixing_ii_volume": pd.to_numeric(df.get("fixing_ii_volume"), errors="coerce").values,
|
|
||||||
}, index=df["date"]).sort_index()
|
|
||||||
|
|
||||||
# sanity check — nie wyszło pusto
|
|
||||||
if out[["fixing_i_pln_kwh", "fixing_ii_pln_kwh"]].notna().sum().sum() == 0:
|
|
||||||
raise RuntimeError("Brak cen po przeliczeniu — sprawdź separator/format liczb w CSV.")
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
import EnergyScrapper as es
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
path = "electricity_prices_day_ahead_hourly_all.csv"
|
path = "electricity_prices_day_ahead_hourly_all.csv"
|
||||||
|
|
||||||
|
scraper = EnergyPriceScraperFactory.create("InstratRDN_CSV", conn=EnergyPriceScraperFactory.setup_db(), path=path)
|
||||||
|
|
||||||
|
day = scraper.fetch_day(datetime(2025, 6, 27, 0, 0))
|
||||||
|
print(day)
|
||||||
|
|
||||||
|
# conn = es.setup_db()
|
||||||
# df = load_instrat_csv(path)
|
# df = load_instrat_csv(path)
|
||||||
|
|
||||||
# netto
|
|
||||||
# df[["fixing_i_pln_kwh","fixing_ii_pln_kwh"]].to_csv(
|
|
||||||
# "tge_fixings_pln_kwh.csv", index_label="timestamp", float_format="%.2f"
|
|
||||||
# )
|
|
||||||
# print("OK: zapisano tge_fixings_pln_kwh_pl.csv oraz tge_fixings_pln_kwh.csv")
|
|
||||||
|
|
||||||
conn = es.setup_db()
|
|
||||||
|
|
||||||
# s = df["fixing_i_pln_kwh"]
|
# s = df["fixing_i_pln_kwh"]
|
||||||
# rows1 = es.rows_from_series(s,
|
# rows1 = es.rows_from_series(s,
|
||||||
# provider="instrat",
|
# provider="instrat",
|
||||||
@ -58,8 +24,8 @@ if __name__ == "__main__":
|
|||||||
# meta={"type":"RDN", "unit":"PLN/kWh","source":"csv_export", "taxes_included":False}
|
# meta={"type":"RDN", "unit":"PLN/kWh","source":"csv_export", "taxes_included":False}
|
||||||
# )
|
# )
|
||||||
# es.upsert_energy_prices(conn, rows1)
|
# es.upsert_energy_prices(conn, rows1)
|
||||||
#
|
# #
|
||||||
# s = df["fixing_ii_pln_kwh"]
|
# # s = df["fixing_ii_pln_kwh"]
|
||||||
# rows1 = es.rows_from_series(s,
|
# rows1 = es.rows_from_series(s,
|
||||||
# provider="instrat",
|
# provider="instrat",
|
||||||
# kind="fixing II",
|
# kind="fixing II",
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user