ranczo-energy-usage-scrapers/Scraper/PstrykScraper.py
Bartosz Wieczorek 166d64d51e init
2025-09-02 18:14:05 +02:00

122 lines
4.7 KiB
Python

from __future__ import annotations
from datetime import datetime, timedelta, date, timezone
from typing import List, Tuple, Dict, Any, Optional
import os
import requests
from EnergyPriceScraper import EnergyPriceScraperBase
from logging_utils import HasLogger
from utils.time_helpers import UTC, WARSAW_TZ
class PstrykScraper(EnergyPriceScraperBase, HasLogger):
"""
Szablon: ceny publikowane przez sprzedawcę (Pstryk).
Załóż: Bearer token w ENV PSTRYK_TOKEN, endpoint w ENV PSTRYK_API_BASE, np.:
PSTRYK_API_BASE=https://api.pstryk.example.com
Endpoint (przykład): GET /prices?date=YYYY-MM-DD
-> [{"ts":"2025-08-27T00:00:00+02:00","net_pln_kwh":0.44}, ...]
"""
api_base: str
token: str
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.PROVIDER = "PSTRYK"
self.KIND = "market_price"
self.SIDE = "buy"
self.BUYER = "end_user"
self.SELLER = "PSTRYK"
self.init_logger()
self.api_base = os.getenv("PSTRYK_API_BASE", "https://api.pstryk.pl/").rstrip("/")
self.token = os.getenv("PSTRYK_TOKEN", "sk-QLX1AHLF83X15VWPYRUD5G87BK8DBF0SS9XLWQ8R")
if not self.api_base or not self.token:
raise RuntimeError("Ustaw PSTRYK_API_BASE i PSTRYK_TOKEN w środowisku.")
self.session = requests.Session()
self.session.headers.update({
"accept": "application/json",
"Authorization": f"{self.token}",
"user-agent": "energy-scraper/1.0",
})
self.log.debug("Initializing PSTRYK Done")
def fetch_range(self, start_date: datetime, end_date: datetime) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
assert start_date < end_date
assert start_date.tzinfo is not None
assert end_date.tzinfo is not None
assert start_date.tzinfo == UTC
assert end_date.tzinfo == UTC
url = f"{self.api_base}/integrations/pricing"
self.log.debug(f"Fetching url: {url}")
self.log.info(f"Fetching range: [{start_date}, {end_date}) UTC / [{start_date.astimezone(WARSAW_TZ)}, {end_date.astimezone(WARSAW_TZ)}) Europe/Warsaw")
r = self.session.get(url, params=
{
"resolution": "hour",
"window_start":start_date.strftime("%Y-%m-%dT%H:%M:%SZ"),
"window_end": end_date.strftime("%Y-%m-%dT%H:%M:%SZ"),
}, timeout=30)
r.raise_for_status()
data = r.json()
out: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
self.log.debug(f"Fetched {len(data['frames'])} data frames for [{start_date}, {end_date}) UTC")
for frame in data['frames']:
row = self.parse_generic_price_frame(frame)
if row is not None:
out.append(row)
return out
def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
start = datetime(
year=business_day.year,
month=business_day.month,
day=business_day.day,
hour=0,
minute=0,
second=0,
tzinfo=tz
).astimezone(UTC)
end = start + timedelta(days=1)
return self.fetch_range(start, end)
def parse_generic_price_frame(self, rec: dict):
"""
Wejście (przykład):
{'start':'2025-09-01T00:00:00+00:00','end':'2025-09-01T01:00:00+00:00',
'price_net':0.37,'price_gross':0.65,'is_cheap':True,'is_expensive':False}
Wyjście:
(ts_start_utc, ts_end_utc, price_pln_kwh_net, meta)
"""
if rec.get("is_cheap") is None or rec.get("is_expensive") is None:
self.log.info(f"Ignoring non-valid price frame {rec}")
return None
try:
ts_start = datetime.fromisoformat(rec["start"]).astimezone(UTC)
ts_end = datetime.fromisoformat(rec["end"]).astimezone(UTC)
except Exception as e:
raise ValueError(f"Bad iso timeformat in 'start'/'end': {e}") from e
if ts_end <= ts_start:
raise ValueError(f"Bad range: start={ts_start.isoformat()} end={ts_end.isoformat()}")
try:
price_pln_kwh_net = float(rec["price_net"])
except Exception as e:
raise ValueError(f"Price net not available 'price_net': {e}") from e
meta = {
"unit": "PLN/kWh",
"taxes_included": False,
"is_cheap": bool(rec.get("is_cheap") ),
"is_expensive": bool(rec.get("is_expensive"))
}
return ts_start, ts_end, price_pln_kwh_net, meta