ranczo-energy-price-scrapers/Scraper/PSE_RCEScraper.py

100 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
from datetime import datetime, timedelta, date, timezone
from typing import List, Tuple, Dict, Any
import requests
from EnergyPriceScraper import EnergyPriceScraperBase
from utils.time_helpers import UTC, WARSAW_TZ
from logging_utils import HasLogger
class PSE_RCEScraper(EnergyPriceScraperBase, HasLogger):
"""
PSE RCE (PLN) godziny dla danej doby.
Zwraca NETTO PLN/kWh (jeżeli RCE jest w PLN/MWh, dzielimy przez 1000).
"""
api_url: str = "https://api.raporty.pse.pl/api/rce-pln"
session: requests.Session
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.PROVIDER = "PSE"
self.KIND = "rce"
self.SIDE = "sell"
self.BUYER = "reteiler"
self.SELLER = "prosumer"
self.init_logger()
self.session = requests.Session()
self.session.headers.update({"accept": "application/json"})
self.log.info("Initializing PSE RCE Done")
def fetch_range(self, start_date: datetime, end_date: datetime) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
assert start_date < end_date
assert start_date.tzinfo is not None
assert end_date.tzinfo is not None
assert start_date.tzinfo == UTC
assert end_date.tzinfo == UTC
assert end_date - start_date == timedelta(days=1) # for now no way to import more than one day
self.log.info(f"Fetching range: [{start_date}, {end_date}) UTC / [{start_date.astimezone(WARSAW_TZ)}, {end_date.astimezone(WARSAW_TZ)}) Europe/Warsaw")
business_day = start_date.astimezone(WARSAW_TZ).date()
self.log.debug(f"business_day: {business_day}")
# RCE v2: filter by business_date, select rce_pln,dtime,period
params = {
"$select": "rce_pln,publication_ts_utc,dtime_utc,business_date",
"$filter": f"business_date eq '{business_day:%Y-%m-%d}'",
}
r = self.session.get(self.api_url, params=params, timeout=30)
r.raise_for_status()
data = r.json().get("value", [])
self.log.debug(f"Fetched data len: {len(data)} points")
out: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
for item in data:
out.append(self.parse_pse_rce_record(item))
return out
def fetch_day(self, business_day: date, tz: timezone) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
start = datetime(
year=business_day.year,
month=business_day.month,
day=business_day.day,
hour=0,
minute=0,
second=0,
tzinfo=tz
).astimezone(UTC)
end = start + timedelta(days=1)
return self.fetch_range(start, end)
@staticmethod
def PSE_date_range(dtime: datetime):
ts_end = dtime
ts_start = dtime - timedelta(minutes=15)
return ts_start, ts_end
@staticmethod
def parse_pse_rce_record(rec: dict):
# 'dtime' date is the END of timeslot, so begining is dtime - t_stop
dtime_utc = datetime.strptime(rec["dtime_utc"], "%Y-%m-%d %H:%M:%S").replace(tzinfo=UTC)
ts_start, ts_end = PSE_RCEScraper.PSE_date_range(dtime=dtime_utc)
price_pln_mwh = float(rec["rce_pln"])
price_pln_kwh = price_pln_mwh / 1000.0
meta = {
"business_date": rec["business_date"],
"source": "PSE_RCE",
"publication_ts_utc": rec["publication_ts_utc"],
"unit": "PLN/kWh",
"taxes_included": False
}
return ts_start, ts_end, price_pln_kwh, meta