Add data scrappers
This commit is contained in:
parent
2aec71ce49
commit
01f4f569f4
@ -1,4 +1,3 @@
|
||||
# DistributionProvider/TauronG13sProvider.py
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from datetime import time, datetime
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
# EnergyPriceProvider/RDNProvider.py
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
# EnergyPriceProvider/TauronG13Provider.py
|
||||
from __future__ import annotations
|
||||
from datetime import time, datetime
|
||||
from EnergyPrice import EnergyPriceBase
|
||||
|
||||
145
EnergyPriceScraper.py
Normal file
145
EnergyPriceScraper.py
Normal file
@ -0,0 +1,145 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, date, timedelta
|
||||
from typing import Iterable, List, Tuple, Dict, Any, Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
import json
|
||||
import psycopg
|
||||
import time as _time
|
||||
|
||||
WAW = ZoneInfo("Europe/Warsaw")
|
||||
IntervalRow = Tuple[datetime, datetime, float, str, str, str, str, str, str] # patrz _rows_to_upsert
|
||||
|
||||
UPSERT_SQL = """
|
||||
INSERT INTO pricing.energy_prices
|
||||
(ts_start, ts_end, price_pln_net, provider, kind, side, buyer, seller, source_meta)
|
||||
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
||||
ON CONFLICT (ts_start, ts_end, provider, kind, side)
|
||||
DO UPDATE SET
|
||||
price_pln_net = EXCLUDED.price_pln_net,
|
||||
buyer = EXCLUDED.buyer,
|
||||
seller = EXCLUDED.seller,
|
||||
source_meta = COALESCE(pricing.energy_prices.source_meta, '{}'::jsonb)
|
||||
|| COALESCE(EXCLUDED.source_meta, '{}'::jsonb),
|
||||
inserted_at = now();
|
||||
"""
|
||||
|
||||
@dataclass
|
||||
class EnergyPriceScraperBase:
|
||||
"""Bazowa klasa dla scraperów rynkowych (zbieranie → normalizacja → UPSERT)."""
|
||||
dsn: Optional[str] = None
|
||||
conn: Optional[psycopg.Connection] = None
|
||||
tz: ZoneInfo = WAW
|
||||
period: timedelta = timedelta(hours=1)
|
||||
# identyfikatory – NADPISZ w podklasie:
|
||||
PROVIDER: str = "" # np. 'PSE' / 'instrat' / 'PSTRYK'
|
||||
KIND: str = "" # np. 'rce' / 'fixing_I' / 'market_price'
|
||||
SIDE: str = "buy" # 'buy'|'sell'
|
||||
BUYER: str = "end_user"
|
||||
SELLER: str = "market_index"
|
||||
|
||||
# throttling/retry
|
||||
max_retries: int = 3
|
||||
backoff_sec: float = 1.0
|
||||
|
||||
# ---------- public API ----------
|
||||
def ingest_day(self, business_day: date) -> int:
|
||||
"""Pobiera i zapisuje całą dobę [00:00, 24:00) lokalnie. Zwraca liczbę upsertowanych wierszy."""
|
||||
points = self.fetch_day(business_day) # [(start,end,price,meta_dict), ...]
|
||||
rows = self._rows_to_upsert(points)
|
||||
return self._upsert(rows)
|
||||
|
||||
def ingest_range(self, start_day: date, end_day: date) -> int:
|
||||
"""Backfill: [start_day, end_day] po dniach lokalnych."""
|
||||
total = 0
|
||||
d = start_day
|
||||
while d <= end_day:
|
||||
total += self.ingest_day(d)
|
||||
d = d + timedelta(days=1)
|
||||
return total
|
||||
|
||||
def fetch_day(self, business_day: date) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||
"""Zaimplementuj w podklasie. Zwracaj listę punktów z NETTO PLN/kWh."""
|
||||
raise NotImplementedError
|
||||
|
||||
# ---------- helpers ----------
|
||||
def _ensure_conn(self) -> psycopg.Connection:
|
||||
if self.conn:
|
||||
return self.conn
|
||||
if not self.dsn:
|
||||
raise RuntimeError("Podaj dsn= lub conn= dla PriceScraperBase")
|
||||
self.conn = psycopg.connect(self.dsn)
|
||||
return self.conn
|
||||
|
||||
def _rows_to_upsert(self, points: Iterable[Tuple[datetime, datetime, float, Dict[str, Any]]]) -> List[IntervalRow]:
|
||||
rows: List[IntervalRow] = []
|
||||
for ts_start, ts_end, price_pln_kwh_net, meta in points:
|
||||
# sanity: TZ
|
||||
if ts_start.tzinfo is None: ts_start = ts_start.replace(tzinfo=self.tz)
|
||||
else: ts_start = ts_start.astimezone(self.tz)
|
||||
if ts_end.tzinfo is None: ts_end = ts_end.replace(tzinfo=self.tz)
|
||||
else: ts_end = ts_end.astimezone(self.tz)
|
||||
rows.append((
|
||||
ts_start, ts_end, float(price_pln_kwh_net),
|
||||
self.PROVIDER, self.KIND, self.SIDE, self.BUYER, self.SELLER,
|
||||
json.dumps(meta or {})
|
||||
))
|
||||
return rows
|
||||
|
||||
def _upsert(self, rows: List[IntervalRow]) -> int:
|
||||
if not rows: return 0
|
||||
for attempt in range(1, self.max_retries + 1):
|
||||
try:
|
||||
with self._ensure_conn().cursor() as cur:
|
||||
cur.executemany(UPSERT_SQL, rows)
|
||||
self._ensure_conn().commit()
|
||||
return len(rows)
|
||||
except Exception:
|
||||
if attempt >= self.max_retries:
|
||||
raise
|
||||
_time.sleep(self.backoff_sec * attempt)
|
||||
return 0
|
||||
|
||||
# małe ułatwienie do budowy godzin z doby
|
||||
def _day_range(self, d: date) -> Tuple[datetime, datetime]:
|
||||
start = datetime(d.year, d.month, d.day, 0, 0, tzinfo=self.tz)
|
||||
return start, start + timedelta(days=1)
|
||||
|
||||
|
||||
|
||||
def rows_from_series(series_pln_per_kwh: pd.Series, provider: str, kind: str,
|
||||
period: timedelta = timedelta(hours=1), meta: dict | None = None):
|
||||
"""
|
||||
Zamienia serię godzinową (index = start okresu, tz-aware) na listę wierszy dla upsertu.
|
||||
"""
|
||||
if series_pln_per_kwh.empty:
|
||||
return []
|
||||
|
||||
s = series_pln_per_kwh.copy()
|
||||
idx = s.index
|
||||
if getattr(idx, "tz", None) is None:
|
||||
idx = idx.tz_localize(TZ, nonexistent="shift_forward", ambiguous="infer")
|
||||
s.index = idx
|
||||
else:
|
||||
s = s.tz_convert(TZ)
|
||||
|
||||
meta_json = json.dumps(meta or {})
|
||||
rows = []
|
||||
for ts_start, price in s.dropna().items():
|
||||
ts_end = ts_start + period
|
||||
rows.append((ts_start, ts_end, float(price), provider, kind, meta_json))
|
||||
return rows
|
||||
|
||||
def upsert_energy_prices(conn, rows):
|
||||
"""
|
||||
rows: iterable krotek:
|
||||
(ts_start, ts_end, price_pln_net, provider, kind, source_meta_json)
|
||||
"""
|
||||
if not rows:
|
||||
return
|
||||
with conn.cursor() as cur:
|
||||
cur.executemany(UPSERT_SQL, rows)
|
||||
conn.commit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
conn = setup_db()
|
||||
48
EnergyPriceScraperFactory.py
Normal file
48
EnergyPriceScraperFactory.py
Normal file
@ -0,0 +1,48 @@
|
||||
# EnergyPriceScraperFactory.py
|
||||
from __future__ import annotations
|
||||
import importlib
|
||||
from typing import Any, cast, Type
|
||||
from EnergyPriceScraper import EnergyPriceScraperBase
|
||||
|
||||
import os
|
||||
import psycopg
|
||||
|
||||
DB_HOST = os.getenv("PGHOST", "192.168.30.10")
|
||||
DB_PORT = int(os.getenv("PGPORT", "5432"))
|
||||
DB_NAME = os.getenv("PGDATABASE", "postgres")
|
||||
DB_USER = os.getenv("PGUSER", "energy_ingest")
|
||||
DB_PASS = os.getenv("PGPASSWORD", "2f1rLCa03mQrbmlCbD6envk")
|
||||
|
||||
def setup_db():
|
||||
# psycopg 3
|
||||
conn = psycopg.connect(
|
||||
host=DB_HOST, port=DB_PORT, dbname=DB_NAME, user=DB_USER, password=DB_PASS
|
||||
)
|
||||
return conn
|
||||
|
||||
def create(name: str, /, **kwargs: Any) -> EnergyPriceScraperBase:
|
||||
"""
|
||||
Convention:
|
||||
module: Scraper.<Name>Scraper
|
||||
class: <Name>Provider
|
||||
Example: create("TauronG13", rates={...})
|
||||
"""
|
||||
safe = "".join(ch for ch in name if ch.isalnum() or ch == "_")
|
||||
module_name = f"Scraper.{safe}Scraper"
|
||||
class_name = f"{safe}Scraper"
|
||||
|
||||
try:
|
||||
mod = importlib.import_module(module_name)
|
||||
except ModuleNotFoundError as e:
|
||||
raise ValueError(f"Scraper module not found: {module_name}") from e
|
||||
|
||||
try:
|
||||
cls = getattr(mod, class_name)
|
||||
except AttributeError as e:
|
||||
raise ValueError(f"Scraper class not found: {class_name} in {module_name}") from e
|
||||
|
||||
if not issubclass(cls, EnergyPriceScraperBase):
|
||||
raise TypeError(f"{class_name} must inherit PriceScraperBase")
|
||||
|
||||
ProviderCls = cast(Type[EnergyPriceScraperBase], cls)
|
||||
return ProviderCls(**kwargs) # type: ignore[arg-type]
|
||||
@ -1,72 +0,0 @@
|
||||
import os
|
||||
from datetime import timedelta
|
||||
import json
|
||||
import psycopg
|
||||
import pandas as pd
|
||||
import zoneinfo
|
||||
|
||||
TZ = zoneinfo.ZoneInfo("Europe/Warsaw")
|
||||
|
||||
DB_HOST = os.getenv("PGHOST", "192.168.30.10")
|
||||
DB_PORT = int(os.getenv("PGPORT", "5432"))
|
||||
DB_NAME = os.getenv("PGDATABASE", "postgres")
|
||||
DB_USER = os.getenv("PGUSER", "energy_ingest")
|
||||
DB_PASS = os.getenv("PGPASSWORD", "2f1rLCa03mQrbmlCbD6envk")
|
||||
|
||||
UPSERT_SQL = """
|
||||
INSERT INTO pricing.energy_prices
|
||||
(ts_start, ts_end, price_pln_net, provider, kind, side, buyer, seller, source_meta)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (ts_start, ts_end, provider, kind, side)
|
||||
DO UPDATE SET
|
||||
price_pln_net = EXCLUDED.price_pln_net,
|
||||
buyer = EXCLUDED.buyer,
|
||||
seller = EXCLUDED.seller,
|
||||
source_meta = COALESCE(pricing.energy_prices.source_meta, '{}'::jsonb)
|
||||
|| COALESCE(EXCLUDED.source_meta, '{}'::jsonb),
|
||||
inserted_at = now();
|
||||
"""
|
||||
|
||||
def setup_db():
|
||||
# psycopg 3
|
||||
conn = psycopg.connect(
|
||||
host=DB_HOST, port=DB_PORT, dbname=DB_NAME, user=DB_USER, password=DB_PASS
|
||||
)
|
||||
return conn
|
||||
|
||||
def rows_from_series(series_pln_per_kwh: pd.Series, provider: str, kind: str,
|
||||
period: timedelta = timedelta(hours=1), meta: dict | None = None):
|
||||
"""
|
||||
Zamienia serię godzinową (index = start okresu, tz-aware) na listę wierszy dla upsertu.
|
||||
"""
|
||||
if series_pln_per_kwh.empty:
|
||||
return []
|
||||
|
||||
s = series_pln_per_kwh.copy()
|
||||
idx = s.index
|
||||
if getattr(idx, "tz", None) is None:
|
||||
idx = idx.tz_localize(TZ, nonexistent="shift_forward", ambiguous="infer")
|
||||
s.index = idx
|
||||
else:
|
||||
s = s.tz_convert(TZ)
|
||||
|
||||
meta_json = json.dumps(meta or {})
|
||||
rows = []
|
||||
for ts_start, price in s.dropna().items():
|
||||
ts_end = ts_start + period
|
||||
rows.append((ts_start, ts_end, float(price), provider, kind, meta_json))
|
||||
return rows
|
||||
|
||||
def upsert_energy_prices(conn, rows):
|
||||
"""
|
||||
rows: iterable krotek:
|
||||
(ts_start, ts_end, price_pln_net, provider, kind, source_meta_json)
|
||||
"""
|
||||
if not rows:
|
||||
return
|
||||
with conn.cursor() as cur:
|
||||
cur.executemany(UPSERT_SQL, rows)
|
||||
conn.commit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
conn = setup_db()
|
||||
78
Scraper/InstratRDN_CSVScraper.py
Normal file
78
Scraper/InstratRDN_CSVScraper.py
Normal file
@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
from datetime import datetime, timedelta, date
|
||||
from typing import List, Tuple, Dict, Any
|
||||
import pandas as pd
|
||||
from EnergyPriceScraper import EnergyPriceScraperBase
|
||||
from utils.time_helpers import WARSAW_TZ
|
||||
|
||||
class InstratRDN_CSVScraper(EnergyPriceScraperBase):
|
||||
"""
|
||||
Przykładowy scraper RDN z CSV/JSON (public HTTP).
|
||||
Oczekuje CSV z kolumnami: 'date', 'fixing_i_price' (PLN/MWh) lub już PLN/kWh.
|
||||
"""
|
||||
PROVIDER = "instrat"
|
||||
KIND = "fixing_I"
|
||||
SIDE = "buy"
|
||||
BUYER = "end_user"
|
||||
SELLER = "market_index"
|
||||
|
||||
url: str
|
||||
|
||||
def __init__(self, path: str, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.data = self.load_instrat_csv(path)
|
||||
|
||||
def load_instrat_csv(self, path: str) -> pd.DataFrame:
|
||||
"""
|
||||
Wczytuje CSV Instrat z format
|
||||
date,fixing_i_price,fixing_i_volume,fixing_ii_price,fixing_ii_volume
|
||||
01.01.2016 00:00,108.27,2565.10,108.55,89.10
|
||||
"""
|
||||
# 1) Wczytanie z autodetekcją polskiego formatu
|
||||
dateparse = lambda x: datetime.strptime(x, '%d.%m.%Y %H:%M').replace(tzinfo=WARSAW_TZ)
|
||||
df = pd.read_csv(path, parse_dates=['date'], date_parser=dateparse)
|
||||
|
||||
fi_pln_kwh = (df["fixing_i_price"] / 1000.0).round(4)
|
||||
fii_pln_kwh = (df["fixing_ii_price"] / 1000.0).round(4)
|
||||
|
||||
self.out = pd.DataFrame({
|
||||
"fixing_i_pln_kwh": fi_pln_kwh.values,
|
||||
"fixing_ii_pln_kwh": fii_pln_kwh.values,
|
||||
"fixing_i_volume": pd.to_numeric(df.get("fixing_i_volume"), errors="coerce").values,
|
||||
"fixing_ii_volume": pd.to_numeric(df.get("fixing_ii_volume"), errors="coerce").values,
|
||||
}, index=df["date"]).sort_index()
|
||||
|
||||
# sanity check — nie wyszło pusto
|
||||
if self.out[["fixing_i_pln_kwh", "fixing_ii_pln_kwh"]].notna().sum().sum() == 0:
|
||||
raise RuntimeError("Brak cen po przeliczeniu — sprawdź separator/format liczb w CSV.")
|
||||
|
||||
def fetch_day(self, business_day: date) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||
if not hasattr(self, "out"):
|
||||
raise RuntimeError("Brak danych: najpierw wczytaj CSV i zbuduj self.out")
|
||||
|
||||
# wybór kolumny wg KIND (domyślnie Fixing I)
|
||||
kind = getattr(self, "KIND", "fixing_I")
|
||||
kind_norm = str(kind).replace(" ", "_").lower()
|
||||
if "fixing_ii" in kind_norm:
|
||||
col = "fixing_ii_pln_kwh"
|
||||
fixing_tag = "II"
|
||||
else:
|
||||
col = "fixing_i_pln_kwh"
|
||||
fixing_tag = "I"
|
||||
|
||||
# zakres doby lokalnej
|
||||
day_start = datetime(business_day.year, business_day.month, business_day.day, 0, 0, tzinfo=self.tz)
|
||||
day_end = day_start + timedelta(days=1)
|
||||
|
||||
# filtr i emisja punktów
|
||||
df_day = self.out.loc[(self.out.index >= day_start) & (self.out.index < day_end)]
|
||||
if col not in df_day.columns:
|
||||
raise KeyError(f"Kolumna '{col}' nie istnieje w self.out")
|
||||
|
||||
points: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
|
||||
for ts, price in df_day[col].dropna().items():
|
||||
ts_end = ts + getattr(self, "period", timedelta(hours=1))
|
||||
points.append((ts.to_pydatetime(), ts_end.to_pydatetime(), float(price),
|
||||
{"source": "instrat_csv", "unit": "PLN/kWh", "fixing": fixing_tag, "taxes_included": False}))
|
||||
return points
|
||||
|
||||
46
Scraper/PSE_RCEScraper.py
Normal file
46
Scraper/PSE_RCEScraper.py
Normal file
@ -0,0 +1,46 @@
|
||||
from __future__ import annotations
|
||||
from datetime import datetime, timedelta, date
|
||||
from typing import List, Tuple, Dict, Any
|
||||
import requests
|
||||
from EnergyPriceScraper import EnergyPriceScraperBase, WAW
|
||||
|
||||
|
||||
class PSE_RCEScraper(EnergyPriceScraperBase):
|
||||
"""
|
||||
PSE RCE (PLN) – godziny dla danej doby.
|
||||
Zwraca NETTO PLN/kWh (jeżeli RCE jest w PLN/MWh, dzielimy przez 1000).
|
||||
"""
|
||||
PROVIDER = "PSE"
|
||||
KIND = "rce"
|
||||
SIDE = "sell"
|
||||
BUYER = "reteiler" # sprzedawca rozliczajacy prosumenta
|
||||
SELLER = "prosumer"
|
||||
|
||||
api_url: str = "https://api.raporty.pse.pl/api/rce-pln"
|
||||
session: requests.Session
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({"accept": "application/json"})
|
||||
|
||||
def fetch_day(self, business_day: date) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||
# RCE v2: filter by business_date, select rce_pln,dtime,period
|
||||
params = {
|
||||
"$select": "rce_pln,dtime,period",
|
||||
"$filter": f"business_date eq '{business_day:%Y-%m-%d}'",
|
||||
}
|
||||
r = self.session.get(self.api_url, params=params, timeout=30)
|
||||
r.raise_for_status()
|
||||
data = r.json().get("value", [])
|
||||
|
||||
out: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
|
||||
for item in data:
|
||||
# dtime to ISO; period (w minutach) bywa 60
|
||||
ts = datetime.fromisoformat(item["dtime"]).astimezone(WAW)
|
||||
per_min = int(item.get("period", 60))
|
||||
ts_end = ts + timedelta(minutes=per_min)
|
||||
price_pln_mwh = float(item["rce_pln"])
|
||||
price_pln_kwh = price_pln_mwh / 1000.0 # NETTO PLN/kWh
|
||||
out.append((ts, ts_end, price_pln_kwh, {"source": "PSE_RCE_v2"}))
|
||||
return out
|
||||
49
Scraper/PstrykScraper.py
Normal file
49
Scraper/PstrykScraper.py
Normal file
@ -0,0 +1,49 @@
|
||||
from __future__ import annotations
|
||||
from datetime import datetime, timedelta, date
|
||||
from typing import List, Tuple, Dict, Any, Optional
|
||||
import os
|
||||
import requests
|
||||
from EnergyPriceScraper import EnergyPriceScraperBase, WAW
|
||||
|
||||
class PstrykScraper(EnergyPriceScraperBase):
|
||||
"""
|
||||
Szablon: ceny publikowane przez sprzedawcę (Pstryk).
|
||||
Załóż: Bearer token w ENV PSTRYK_TOKEN, endpoint w ENV PSTRYK_API_BASE, np.:
|
||||
PSTRYK_API_BASE=https://api.pstryk.example.com
|
||||
Endpoint (przykład): GET /prices?date=YYYY-MM-DD
|
||||
-> [{"ts":"2025-08-27T00:00:00+02:00","net_pln_kwh":0.44}, ...]
|
||||
"""
|
||||
PROVIDER = "PSTRYK"
|
||||
KIND = "market_price"
|
||||
SIDE = "buy"
|
||||
BUYER = "end_user"
|
||||
SELLER = "PSTRYK"
|
||||
|
||||
api_base: str
|
||||
token: str
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.api_base = os.getenv("PSTRYK_API_BASE", "").rstrip("/")
|
||||
self.token = os.getenv("PSTRYK_TOKEN", "")
|
||||
if not self.api_base or not self.token:
|
||||
raise RuntimeError("Ustaw PSTRYK_API_BASE i PSTRYK_TOKEN w środowisku.")
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({
|
||||
"accept": "application/json",
|
||||
"authorization": f"Bearer {self.token}",
|
||||
"user-agent": "energy-scraper/1.0",
|
||||
})
|
||||
|
||||
def fetch_day(self, business_day: date) -> List[Tuple[datetime, datetime, float, Dict[str, Any]]]:
|
||||
url = f"{self.api_base}/prices"
|
||||
r = self.session.get(url, params={"date": f"{business_day:%Y-%m-%d}"}, timeout=30)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
|
||||
out: List[Tuple[datetime, datetime, float, Dict[str, Any]]] = []
|
||||
for item in data:
|
||||
ts = datetime.fromisoformat(item["ts"]).astimezone(WAW)
|
||||
p = float(item["net_pln_kwh"])
|
||||
out.append((ts, ts + self.period, p, {"source": "pstryk_api"}))
|
||||
return out
|
||||
1
Scraper/__init__.py
Normal file
1
Scraper/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# (left intentionally empty; factory imports modules dynamically)
|
||||
54
main.py
54
main.py
@ -1,56 +1,22 @@
|
||||
# pip install pandas python-dateutil
|
||||
import pandas as pd
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
import zoneinfo
|
||||
|
||||
TZ = zoneinfo.ZoneInfo("Europe/Warsaw")
|
||||
|
||||
import DistributionCostFactory
|
||||
from EnergyPriceProvider import DynamicPricesProvider
|
||||
from plot_cost_breakdown import plot_stacked_with_negatives
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
def load_instrat_csv(path: str) -> pd.DataFrame:
|
||||
"""
|
||||
Wczytuje CSV Instrat z format
|
||||
date,fixing_i_price,fixing_i_volume,fixing_ii_price,fixing_ii_volume
|
||||
01.01.2016 00:00,108.27,2565.10,108.55,89.10
|
||||
"""
|
||||
# 1) Wczytanie z autodetekcją polskiego formatu
|
||||
dateparse = lambda x: datetime.strptime(x, '%d.%m.%Y %H:%M').replace(tzinfo=TZ)
|
||||
df = pd.read_csv(path, parse_dates=['date'], date_parser=dateparse)
|
||||
|
||||
fi_pln_kwh = (df["fixing_i_price"] / 1000.0).round(4)
|
||||
fii_pln_kwh = (df["fixing_ii_price"] / 1000.0).round(4)
|
||||
|
||||
out = pd.DataFrame({
|
||||
"fixing_i_pln_kwh": fi_pln_kwh.values,
|
||||
"fixing_ii_pln_kwh": fii_pln_kwh.values,
|
||||
"fixing_i_volume": pd.to_numeric(df.get("fixing_i_volume"), errors="coerce").values,
|
||||
"fixing_ii_volume": pd.to_numeric(df.get("fixing_ii_volume"), errors="coerce").values,
|
||||
}, index=df["date"]).sort_index()
|
||||
|
||||
# sanity check — nie wyszło pusto
|
||||
if out[["fixing_i_pln_kwh", "fixing_ii_pln_kwh"]].notna().sum().sum() == 0:
|
||||
raise RuntimeError("Brak cen po przeliczeniu — sprawdź separator/format liczb w CSV.")
|
||||
|
||||
return out
|
||||
|
||||
import EnergyScrapper as es
|
||||
import EnergyPriceScraperFactory
|
||||
|
||||
if __name__ == "__main__":
|
||||
path = "electricity_prices_day_ahead_hourly_all.csv"
|
||||
|
||||
scraper = EnergyPriceScraperFactory.create("InstratRDN_CSV", conn=EnergyPriceScraperFactory.setup_db(), path=path)
|
||||
|
||||
day = scraper.fetch_day(datetime(2025, 6, 27, 0, 0))
|
||||
print(day)
|
||||
|
||||
# conn = es.setup_db()
|
||||
# df = load_instrat_csv(path)
|
||||
|
||||
# netto
|
||||
# df[["fixing_i_pln_kwh","fixing_ii_pln_kwh"]].to_csv(
|
||||
# "tge_fixings_pln_kwh.csv", index_label="timestamp", float_format="%.2f"
|
||||
# )
|
||||
# print("OK: zapisano tge_fixings_pln_kwh_pl.csv oraz tge_fixings_pln_kwh.csv")
|
||||
|
||||
conn = es.setup_db()
|
||||
|
||||
# s = df["fixing_i_pln_kwh"]
|
||||
# rows1 = es.rows_from_series(s,
|
||||
# provider="instrat",
|
||||
@ -58,8 +24,8 @@ if __name__ == "__main__":
|
||||
# meta={"type":"RDN", "unit":"PLN/kWh","source":"csv_export", "taxes_included":False}
|
||||
# )
|
||||
# es.upsert_energy_prices(conn, rows1)
|
||||
#
|
||||
# s = df["fixing_ii_pln_kwh"]
|
||||
# #
|
||||
# # s = df["fixing_ii_pln_kwh"]
|
||||
# rows1 = es.rows_from_series(s,
|
||||
# provider="instrat",
|
||||
# kind="fixing II",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user