diff --git a/CHANGELOG.md b/CHANGELOG.md index e17e30577..f423f4609 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,134 @@ If upgrading from v2.x, see the [v3.0.0 release notes](https://github.com/flixOp Until here --> +## [5.1.0] - Upcoming + +**Summary**: This release introduces a new **aggregation abstraction layer** for time series clustering, making flixopt future-proof for alternative clustering methods beyond TSAM. The API is simplified to focus on timestep reduction (`cluster_reduce`), removing the constraint-based clustering approach. + +### ✨ Added + +**New Aggregation Module** (`flixopt.aggregation`): A backend-agnostic abstraction for time series aggregation: + +```python +from flixopt import aggregation + +# Available backends +aggregation.list_backends() # ['tsam', 'manual'] + +# Core data structures for any aggregation method +aggregation.AggregationResult # Universal result format +aggregation.ClusterStructure # For storage inter-cluster linking +aggregation.Aggregator # Protocol for custom backends +``` + +**Unified Aggregation API**: New `transform.aggregate()` method supporting multiple backends: + +```python +# TSAM clustering (default) - clusters 365 days into 8 typical days +fs_reduced = flow_system.transform.aggregate( + method='tsam', + n_representatives=8, + cluster_duration='1D', +) +fs_reduced.optimize(solver) + +# Expand back to full resolution +fs_expanded = fs_reduced.transform.expand_solution() +``` + +**TimeSeriesWeights Class**: PyPSA-inspired unified weighting system: + +```python +# Access weights on any FlowSystem +weights = flow_system.weights + +# temporal = timestep_duration × cluster_weight +weights.temporal # Applied to objective and constraints +weights.effective_objective # For objective function (with optional override) + +# Convenience method for weighted summation +total_energy = weights.sum_over_time(flow_rates) +``` + +**Manual Aggregation Backend**: Enables PyPSA-style workflow with external clustering tools: + +```python +from flixopt.aggregation import ManualBackend, create_manual_backend_from_labels + +# Use sklearn or any clustering algorithm +from sklearn.cluster import KMeans +# ... perform clustering, get labels ... + +# Create backend from cluster labels +backend = create_manual_backend_from_labels(labels, timesteps_per_cluster=24) + +# Or directly with mapping and weights +backend = ManualBackend( + timestep_mapping=my_mapping, # xr.DataArray: original → representative + representative_weights=my_weights, # xr.DataArray: weight per representative +) +``` + +**set_aggregation() Method** (placeholder): Future PyPSA-style manual aggregation: + +```python +# Coming soon - apply external clustering results directly +fs_agg = flow_system.transform.set_aggregation( + timestep_mapping=mapping, + weights=weights, +) +``` + +### 💥 Breaking Changes + +**Removed `transform.cluster()` method**: The constraint-based clustering approach has been removed. Use `cluster_reduce()` instead: + +```python +# Old (removed): +clustered_fs = flow_system.transform.cluster( + n_clusters=8, + cluster_duration='1D', +) + +# New (use cluster_reduce instead): +reduced_fs = flow_system.transform.cluster_reduce( + n_clusters=8, + cluster_duration='1D', +) +``` + +**Removed constraint-based clustering infrastructure**: +- `transform.cluster()` - removed (use `cluster_reduce()`) +- `transform.add_clustering()` - removed +- `FlowSystem._clustering_info` - removed (only `_cluster_info` for `cluster_reduce` remains) +- `FlowSystem._add_clustering_constraints()` - removed + +### ♻️ Changed + +**Terminology clarification** in aggregation module: +- "cluster" = a group of similar time chunks (e.g., similar days grouped together) +- "typical period" = a representative time chunk for a cluster (TSAM terminology) +- "cluster duration" = the length of each time chunk (e.g., 24h for daily clustering) + +Note: This is separate from the model's "period" dimension (years/months) and "scenario" dimension. + +**xarray-native data structures**: All aggregation interfaces use `xr.DataArray` and `xr.Dataset` for proper coordinate handling. + +### 🔥 Removed + +- `transform.cluster()` method (constraint-based clustering) +- `transform.add_clustering()` method +- `ClusteringModel` constraint generation (internal) +- `_clustering_info` storage on FlowSystem + +### 📝 Docs + +- Improved terminology: clarified distinction between clustering "typical periods" and model "period" dimension +- Added aggregation module documentation with backend examples + +--- + + ## [5.0.3] - 2025-12-18 **Summary**: Cleaner notebook outputs and improved `CONFIG.notebook()` preset. diff --git a/docs/notebooks/08a-aggregation.ipynb b/docs/notebooks/08a-aggregation.ipynb index d7b7576bb..6d0260539 100644 --- a/docs/notebooks/08a-aggregation.ipynb +++ b/docs/notebooks/08a-aggregation.ipynb @@ -12,7 +12,6 @@ "This notebook introduces:\n", "\n", "- **Resampling**: Reduce time resolution (e.g., hourly → 4-hourly)\n", - "- **Clustering**: Identify typical periods (e.g., 8 representative days)\n", "- **Two-stage optimization**: Size with reduced data, dispatch at full resolution\n", "- **Speed vs. accuracy trade-offs**: When to use each technique" ] @@ -35,8 +34,8 @@ "import timeit\n", "\n", "import pandas as pd\n", - "import plotly.express as px\n", - "import xarray as xr\n", + "import plotly.graph_objects as go\n", + "from plotly.subplots import make_subplots\n", "\n", "import flixopt as fx\n", "\n", @@ -48,9 +47,9 @@ "id": "3", "metadata": {}, "source": [ - "## Load Time Series Data\n", + "## Load the FlowSystem\n", "\n", - "We use real-world district heating data at 15-minute resolution (one month):" + "We use a pre-built district heating system with real-world time series data (one month at 15-min resolution):" ] }, { @@ -60,22 +59,22 @@ "metadata": {}, "outputs": [], "source": [ - "# Load time series data (15-min resolution)\n", - "data = pd.read_csv('data/Zeitreihen2020.csv', index_col=0, parse_dates=True).sort_index()\n", - "data = data['2020-01-01':'2020-01-31 23:45:00'] # One month\n", - "data.index.name = 'time' # Rename index for consistency\n", - "\n", - "timesteps = data.index\n", - "\n", - "# Extract profiles\n", - "electricity_demand = data['P_Netz/MW'].to_numpy()\n", - "heat_demand = data['Q_Netz/MW'].to_numpy()\n", - "electricity_price = data['Strompr.€/MWh'].to_numpy()\n", - "gas_price = data['Gaspr.€/MWh'].to_numpy()\n", - "\n", - "print(f'Timesteps: {len(timesteps)} ({len(timesteps) / 96:.0f} days at 15-min resolution)')\n", - "print(f'Heat demand: {heat_demand.min():.1f} - {heat_demand.max():.1f} MW')\n", - "print(f'Electricity price: {electricity_price.min():.1f} - {electricity_price.max():.1f} €/MWh')" + "from pathlib import Path\n", + "\n", + "# Generate example data if not present (for local development)\n", + "data_file = Path('data/district_heating_system.nc4')\n", + "if not data_file.exists():\n", + " from data.generate_example_systems import create_district_heating_system\n", + "\n", + " fs = create_district_heating_system()\n", + " fs.to_netcdf(data_file)\n", + "\n", + "# Load the district heating system (real data from Zeitreihen2020.csv)\n", + "flow_system = fx.FlowSystem.from_netcdf(data_file)\n", + "\n", + "timesteps = flow_system.timesteps\n", + "print(f'Loaded FlowSystem: {len(timesteps)} timesteps ({len(timesteps) / 96:.0f} days at 15-min resolution)')\n", + "print(f'Components: {list(flow_system.components.keys())}')" ] }, { @@ -85,142 +84,24 @@ "metadata": {}, "outputs": [], "source": [ - "# Visualize first week\n", - "profiles = xr.Dataset(\n", - " {\n", - " 'Heat Demand [MW]': xr.DataArray(heat_demand[:672], dims=['time'], coords={'time': timesteps[:672]}),\n", - " 'Electricity Price [€/MWh]': xr.DataArray(\n", - " electricity_price[:672], dims=['time'], coords={'time': timesteps[:672]}\n", - " ),\n", - " }\n", - ")\n", + "# Visualize first week of data\n", + "heat_demand = flow_system.components['HeatDemand'].inputs[0].fixed_relative_profile\n", + "electricity_price = flow_system.components['GridBuy'].outputs[0].effects_per_flow_hour['costs']\n", "\n", - "df = profiles.to_dataframe().reset_index().melt(id_vars='time', var_name='variable', value_name='value')\n", - "fig = px.line(df, x='time', y='value', facet_col='variable', height=300)\n", - "fig.update_yaxes(matches=None, showticklabels=True)\n", - "fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))\n", - "fig" - ] - }, - { - "cell_type": "markdown", - "id": "6", - "metadata": {}, - "source": [ - "## Build the Base FlowSystem\n", + "fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1)\n", "\n", - "A typical district heating system with investment decisions:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "def build_system(timesteps, heat_demand, electricity_demand, electricity_price, gas_price):\n", - " \"\"\"Build a district heating system with CHP, boiler, and storage (with investment options).\"\"\"\n", - " fs = fx.FlowSystem(timesteps)\n", - "\n", - " fs.add_elements(\n", - " # Buses\n", - " fx.Bus('Electricity'),\n", - " fx.Bus('Heat'),\n", - " fx.Bus('Gas'),\n", - " fx.Bus('Coal'),\n", - " # Effects\n", - " fx.Effect('costs', '€', 'Total Costs', is_standard=True, is_objective=True),\n", - " fx.Effect('CO2', 'kg', 'CO2 Emissions'),\n", - " # CHP with investment optimization\n", - " fx.linear_converters.CHP(\n", - " 'CHP',\n", - " thermal_efficiency=0.58,\n", - " electrical_efficiency=0.22,\n", - " electrical_flow=fx.Flow('P_el', bus='Electricity', size=200),\n", - " thermal_flow=fx.Flow(\n", - " 'Q_th',\n", - " bus='Heat',\n", - " size=fx.InvestParameters(\n", - " minimum_size=100,\n", - " maximum_size=300,\n", - " effects_of_investment_per_size={'costs': 10},\n", - " ),\n", - " ),\n", - " fuel_flow=fx.Flow('Q_fu', bus='Coal'),\n", - " ),\n", - " # Gas Boiler with investment optimization\n", - " fx.linear_converters.Boiler(\n", - " 'Boiler',\n", - " thermal_efficiency=0.85,\n", - " thermal_flow=fx.Flow(\n", - " 'Q_th',\n", - " bus='Heat',\n", - " size=fx.InvestParameters(\n", - " minimum_size=0,\n", - " maximum_size=150,\n", - " effects_of_investment_per_size={'costs': 5},\n", - " ),\n", - " ),\n", - " fuel_flow=fx.Flow('Q_fu', bus='Gas'),\n", - " ),\n", - " # Thermal Storage with investment optimization\n", - " fx.Storage(\n", - " 'Storage',\n", - " capacity_in_flow_hours=fx.InvestParameters(\n", - " minimum_size=0,\n", - " maximum_size=1000,\n", - " effects_of_investment_per_size={'costs': 0.5},\n", - " ),\n", - " initial_charge_state=0,\n", - " eta_charge=1,\n", - " eta_discharge=1,\n", - " relative_loss_per_hour=0.001,\n", - " charging=fx.Flow('Charge', size=137, bus='Heat'),\n", - " discharging=fx.Flow('Discharge', size=158, bus='Heat'),\n", - " ),\n", - " # Fuel sources\n", - " fx.Source(\n", - " 'GasGrid',\n", - " outputs=[fx.Flow('Q_Gas', bus='Gas', size=1000, effects_per_flow_hour={'costs': gas_price, 'CO2': 0.3})],\n", - " ),\n", - " fx.Source(\n", - " 'CoalSupply',\n", - " outputs=[fx.Flow('Q_Coal', bus='Coal', size=1000, effects_per_flow_hour={'costs': 4.6, 'CO2': 0.3})],\n", - " ),\n", - " # Electricity grid connection\n", - " fx.Source(\n", - " 'GridBuy',\n", - " outputs=[\n", - " fx.Flow(\n", - " 'P_el',\n", - " bus='Electricity',\n", - " size=1000,\n", - " effects_per_flow_hour={'costs': electricity_price + 0.5, 'CO2': 0.3},\n", - " )\n", - " ],\n", - " ),\n", - " fx.Sink(\n", - " 'GridSell',\n", - " inputs=[fx.Flow('P_el', bus='Electricity', size=1000, effects_per_flow_hour=-(electricity_price - 0.5))],\n", - " ),\n", - " # Demands\n", - " fx.Sink('HeatDemand', inputs=[fx.Flow('Q_th', bus='Heat', size=1, fixed_relative_profile=heat_demand)]),\n", - " fx.Sink(\n", - " 'ElecDemand', inputs=[fx.Flow('P_el', bus='Electricity', size=1, fixed_relative_profile=electricity_demand)]\n", - " ),\n", - " )\n", - "\n", - " return fs\n", - "\n", - "\n", - "flow_system = build_system(timesteps, heat_demand, electricity_demand, electricity_price, gas_price)\n", - "print(f'System: {len(timesteps)} timesteps')" + "fig.add_trace(go.Scatter(x=timesteps[:672], y=heat_demand.values[:672], name='Heat Demand'), row=1, col=1)\n", + "fig.add_trace(go.Scatter(x=timesteps[:672], y=electricity_price.values[:672], name='Electricity Price'), row=2, col=1)\n", + "\n", + "fig.update_layout(height=400, title='First Week of Data')\n", + "fig.update_yaxes(title_text='Heat Demand [MW]', row=1, col=1)\n", + "fig.update_yaxes(title_text='El. Price [€/MWh]', row=2, col=1)\n", + "fig.show()" ] }, { "cell_type": "markdown", - "id": "8", + "id": "6", "metadata": {}, "source": [ "## Technique 1: Resampling\n", @@ -231,13 +112,13 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "7", "metadata": {}, "outputs": [], "source": [ "solver = fx.solvers.HighsSolver(mip_gap=0.01)\n", "\n", - "# Resample from 15min to 4h resolution\n", + "# Resample from 15-min to 4h resolution\n", "fs_resampled = flow_system.transform.resample('4h')\n", "\n", "reduction = (1 - len(fs_resampled.timesteps) / len(flow_system.timesteps)) * 100\n", @@ -247,7 +128,7 @@ { "cell_type": "code", "execution_count": null, - "id": "10", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -261,7 +142,7 @@ }, { "cell_type": "markdown", - "id": "11", + "id": "9", "metadata": {}, "source": [ "## Technique 2: Two-Stage Optimization\n", @@ -273,7 +154,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -292,7 +173,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -309,7 +190,7 @@ }, { "cell_type": "markdown", - "id": "14", + "id": "12", "metadata": {}, "source": [ "## Technique 3: Full Optimization (Baseline)\n", @@ -320,7 +201,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -334,7 +215,7 @@ }, { "cell_type": "markdown", - "id": "16", + "id": "14", "metadata": {}, "source": [ "## Compare Results" @@ -343,7 +224,7 @@ { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -395,7 +276,7 @@ }, { "cell_type": "markdown", - "id": "18", + "id": "16", "metadata": {}, "source": [ "## Visual Comparison: Heat Balance" @@ -404,7 +285,7 @@ { "cell_type": "code", "execution_count": null, - "id": "19", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -415,7 +296,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20", + "id": "18", "metadata": {}, "outputs": [], "source": [ @@ -425,7 +306,7 @@ }, { "cell_type": "markdown", - "id": "21", + "id": "19", "metadata": {}, "source": [ "### Energy Flow Sankey (Full Optimization)\n", @@ -436,7 +317,7 @@ { "cell_type": "code", "execution_count": null, - "id": "22", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -445,7 +326,7 @@ }, { "cell_type": "markdown", - "id": "23", + "id": "21", "metadata": {}, "source": [ "## When to Use Each Technique\n", @@ -485,7 +366,7 @@ }, { "cell_type": "markdown", - "id": "24", + "id": "22", "metadata": {}, "source": [ "## Summary\n", @@ -506,7 +387,8 @@ "\n", "### Next Steps\n", "\n", - "- **[08b-Rolling Horizon](08b-rolling-horizon.ipynb)**: For operational problems without investment decisions, decompose time into sequential segments\n", + "- **[08b-Rolling Horizon](08b-rolling-horizon.ipynb)**: For operational problems, decompose time into sequential segments\n", + "- **[08c-Clustering](08c-clustering.ipynb)**: Use typical periods with the `tsam` package\n", "\n", "### Further Reading\n", "\n", @@ -515,25 +397,7 @@ ] } ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.11" - } - }, + "metadata": {}, "nbformat": 4, "nbformat_minor": 5 } diff --git a/docs/notebooks/08b-rolling-horizon.ipynb b/docs/notebooks/08b-rolling-horizon.ipynb index e9eae6d4f..e43da8f2c 100644 --- a/docs/notebooks/08b-rolling-horizon.ipynb +++ b/docs/notebooks/08b-rolling-horizon.ipynb @@ -51,9 +51,9 @@ "id": "3", "metadata": {}, "source": [ - "## Load Time Series Data\n", + "## Load the FlowSystem\n", "\n", - "We use real-world district heating data at 15-minute resolution (two weeks):" + "We use a pre-built operational district heating system with real-world data (two weeks at 15-min resolution):" ] }, { @@ -63,119 +63,27 @@ "metadata": {}, "outputs": [], "source": [ - "# Load time series data (15-min resolution)\n", - "data = pd.read_csv('data/Zeitreihen2020.csv', index_col=0, parse_dates=True).sort_index()\n", - "data = data['2020-01-01':'2020-01-14 23:45:00'] # Two weeks\n", - "data.index.name = 'time' # Rename index for consistency\n", + "from pathlib import Path\n", "\n", - "timesteps = data.index\n", + "# Generate example data if not present (for local development)\n", + "data_file = Path('data/operational_system.nc4')\n", + "if not data_file.exists():\n", + " from data.generate_example_systems import create_operational_system\n", "\n", - "# Extract profiles\n", - "electricity_demand = data['P_Netz/MW'].to_numpy()\n", - "heat_demand = data['Q_Netz/MW'].to_numpy()\n", - "electricity_price = data['Strompr.€/MWh'].to_numpy()\n", - "gas_price = data['Gaspr.€/MWh'].to_numpy()\n", + " fs = create_operational_system()\n", + " fs.to_netcdf(data_file)\n", "\n", - "print(\n", - " f'{len(timesteps)} timesteps ({len(timesteps) / 96:.0f} days), heat {heat_demand.min():.0f}-{heat_demand.max():.0f} MW'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "def build_system(timesteps, heat_demand, electricity_demand, electricity_price, gas_price):\n", - " \"\"\"Build a district heating system with CHP, boiler, and storage.\"\"\"\n", - " fs = fx.FlowSystem(timesteps)\n", - "\n", - " # Effects\n", - "\n", - " # Buses\n", - " fs.add_elements(\n", - " fx.Bus('Electricity'),\n", - " fx.Bus('Heat'),\n", - " fx.Bus('Gas'),\n", - " fx.Bus('Coal'),\n", - " fx.Effect('costs', '€', 'Total Costs', is_standard=True, is_objective=True),\n", - " fx.Effect('CO2', 'kg', 'CO2 Emissions'),\n", - " fx.linear_converters.CHP(\n", - " 'CHP',\n", - " thermal_efficiency=0.58,\n", - " electrical_efficiency=0.22,\n", - " status_parameters=fx.StatusParameters(effects_per_startup=24000),\n", - " electrical_flow=fx.Flow('P_el', bus='Electricity', size=200),\n", - " thermal_flow=fx.Flow('Q_th', bus='Heat', size=200),\n", - " fuel_flow=fx.Flow('Q_fu', bus='Coal', size=288, relative_minimum=87 / 288, previous_flow_rate=100),\n", - " ),\n", - " fx.linear_converters.Boiler(\n", - " 'Boiler',\n", - " thermal_efficiency=0.85,\n", - " thermal_flow=fx.Flow('Q_th', bus='Heat'),\n", - " fuel_flow=fx.Flow(\n", - " 'Q_fu',\n", - " bus='Gas',\n", - " size=95,\n", - " relative_minimum=12 / 95,\n", - " previous_flow_rate=20,\n", - " status_parameters=fx.StatusParameters(effects_per_startup=1000),\n", - " ),\n", - " ),\n", - " fx.Storage(\n", - " 'Storage',\n", - " capacity_in_flow_hours=684,\n", - " initial_charge_state=137,\n", - " minimal_final_charge_state=137,\n", - " maximal_final_charge_state=158,\n", - " eta_charge=1,\n", - " eta_discharge=1,\n", - " relative_loss_per_hour=0.001,\n", - " prevent_simultaneous_charge_and_discharge=True,\n", - " charging=fx.Flow('Charge', size=137, bus='Heat'),\n", - " discharging=fx.Flow('Discharge', size=158, bus='Heat'),\n", - " ),\n", - " fx.Source(\n", - " 'GasGrid',\n", - " outputs=[fx.Flow('Q_Gas', bus='Gas', size=1000, effects_per_flow_hour={'costs': gas_price, 'CO2': 0.3})],\n", - " ),\n", - " fx.Source(\n", - " 'CoalSupply',\n", - " outputs=[fx.Flow('Q_Coal', bus='Coal', size=1000, effects_per_flow_hour={'costs': 4.6, 'CO2': 0.3})],\n", - " ),\n", - " fx.Source(\n", - " 'GridBuy',\n", - " outputs=[\n", - " fx.Flow(\n", - " 'P_el',\n", - " bus='Electricity',\n", - " size=1000,\n", - " effects_per_flow_hour={'costs': electricity_price + 0.5, 'CO2': 0.3},\n", - " )\n", - " ],\n", - " ),\n", - " fx.Sink(\n", - " 'GridSell',\n", - " inputs=[fx.Flow('P_el', bus='Electricity', size=1000, effects_per_flow_hour=-(electricity_price - 0.5))],\n", - " ),\n", - " fx.Sink('HeatDemand', inputs=[fx.Flow('Q_th', bus='Heat', size=1, fixed_relative_profile=heat_demand)]),\n", - " fx.Sink(\n", - " 'ElecDemand', inputs=[fx.Flow('P_el', bus='Electricity', size=1, fixed_relative_profile=electricity_demand)]\n", - " ),\n", - " )\n", - "\n", - " return fs\n", - "\n", - "\n", - "flow_system = build_system(timesteps, heat_demand, electricity_demand, electricity_price, gas_price)\n", - "print(f'System: {len(timesteps)} timesteps')" + "# Load the operational system (real data from Zeitreihen2020.csv, two weeks)\n", + "flow_system = fx.FlowSystem.from_netcdf(data_file)\n", + "\n", + "timesteps = flow_system.timesteps\n", + "print(f'Loaded FlowSystem: {len(timesteps)} timesteps ({len(timesteps) / 96:.0f} days at 15-min resolution)')\n", + "print(f'Components: {list(flow_system.components.keys())}')" ] }, { "cell_type": "markdown", - "id": "6", + "id": "5", "metadata": {}, "source": [ "## Full Optimization (Baseline)\n", @@ -186,7 +94,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -202,7 +110,7 @@ }, { "cell_type": "markdown", - "id": "8", + "id": "7", "metadata": {}, "source": [ "## Rolling Horizon Optimization\n", @@ -227,7 +135,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -245,7 +153,7 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "9", "metadata": {}, "source": [ "## Compare Results" @@ -254,7 +162,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -276,7 +184,7 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "11", "metadata": {}, "source": [ "## Visualize: Heat Balance Comparison" @@ -285,7 +193,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "12", "metadata": {}, "outputs": [], "source": [ @@ -295,7 +203,7 @@ { "cell_type": "code", "execution_count": null, - "id": "14", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -304,7 +212,7 @@ }, { "cell_type": "markdown", - "id": "15", + "id": "14", "metadata": {}, "source": [ "## Storage State Continuity\n", @@ -315,7 +223,7 @@ { "cell_type": "code", "execution_count": null, - "id": "16", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -339,7 +247,7 @@ }, { "cell_type": "markdown", - "id": "17", + "id": "16", "metadata": {}, "source": [ "## Inspect Individual Segments\n", @@ -350,7 +258,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -363,7 +271,7 @@ }, { "cell_type": "markdown", - "id": "19", + "id": "18", "metadata": {}, "source": [ "## Visualize Segment Overlaps\n", @@ -374,7 +282,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20", + "id": "19", "metadata": {}, "outputs": [], "source": [ @@ -391,7 +299,7 @@ { "cell_type": "code", "execution_count": null, - "id": "21", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -403,7 +311,7 @@ }, { "cell_type": "markdown", - "id": "22", + "id": "21", "metadata": {}, "source": [ "## When to Use Rolling Horizon\n", @@ -424,7 +332,7 @@ }, { "cell_type": "markdown", - "id": "23", + "id": "22", "metadata": {}, "source": [ "## API Reference\n", @@ -448,7 +356,7 @@ }, { "cell_type": "markdown", - "id": "24", + "id": "23", "metadata": {}, "source": [ "## Summary\n", @@ -472,17 +380,7 @@ ] } ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11" - } - }, + "metadata": {}, "nbformat": 4, "nbformat_minor": 5 } diff --git a/docs/notebooks/08c-clustering.ipynb b/docs/notebooks/08c-clustering.ipynb new file mode 100644 index 000000000..cf5b53b53 --- /dev/null +++ b/docs/notebooks/08c-clustering.ipynb @@ -0,0 +1,523 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Time Series Clustering with `cluster()`\n", + "\n", + "Accelerate investment optimization using typical periods (clustering).\n", + "\n", + "This notebook demonstrates:\n", + "\n", + "- **Typical periods**: Cluster similar time segments (e.g., days) and solve only representative ones\n", + "- **Weighted costs**: Automatically weight operational costs by cluster occurrence\n", + "- **Two-stage workflow**: Fast sizing with clustering, accurate dispatch at full resolution\n", + "\n", + "!!! note \"Requirements\"\n", + " This notebook requires the `tsam` package: `pip install tsam`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "import timeit\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import plotly.graph_objects as go\n", + "from plotly.subplots import make_subplots\n", + "\n", + "import flixopt as fx\n", + "\n", + "fx.CONFIG.notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "2", + "metadata": {}, + "source": [ + "## Load the FlowSystem\n", + "\n", + "We use a pre-built district heating system with real-world time series data (one month at 15-min resolution):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "# Generate example data if not present\n", + "data_file = Path('data/district_heating_system.nc4')\n", + "if not data_file.exists():\n", + " from data.generate_example_systems import create_district_heating_system\n", + "\n", + " fs = create_district_heating_system()\n", + " fs.to_netcdf(data_file)\n", + "\n", + "# Load the district heating system\n", + "flow_system = fx.FlowSystem.from_netcdf(data_file)\n", + "\n", + "timesteps = flow_system.timesteps\n", + "print(f'Loaded FlowSystem: {len(timesteps)} timesteps ({len(timesteps) / 96:.0f} days at 15-min resolution)')\n", + "print(f'Components: {list(flow_system.components.keys())}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "# Visualize input data\n", + "heat_demand = flow_system.components['HeatDemand'].inputs[0].fixed_relative_profile\n", + "electricity_price = flow_system.components['GridBuy'].outputs[0].effects_per_flow_hour['costs']\n", + "\n", + "fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1)\n", + "fig.add_trace(go.Scatter(x=timesteps, y=heat_demand.values, name='Heat Demand', line=dict(width=0.5)), row=1, col=1)\n", + "fig.add_trace(\n", + " go.Scatter(x=timesteps, y=electricity_price.values, name='Electricity Price', line=dict(width=0.5)), row=2, col=1\n", + ")\n", + "fig.update_layout(height=400, title='One Month of Input Data')\n", + "fig.update_yaxes(title_text='Heat Demand [MW]', row=1, col=1)\n", + "fig.update_yaxes(title_text='El. Price [€/MWh]', row=2, col=1)\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "## Method 1: Full Optimization (Baseline)\n", + "\n", + "First, solve the complete problem with all 2976 timesteps:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "solver = fx.solvers.HighsSolver(mip_gap=0.01)\n", + "\n", + "start = timeit.default_timer()\n", + "fs_full = flow_system.copy()\n", + "fs_full.optimize(solver)\n", + "time_full = timeit.default_timer() - start\n", + "\n", + "print(f'Full optimization: {time_full:.1f} seconds')\n", + "print(f'Total cost: {fs_full.solution[\"costs\"].item():,.0f} €')\n", + "print('\\nOptimized sizes:')\n", + "for name, size in fs_full.statistics.sizes.items():\n", + " print(f' {name}: {float(size.item()):.1f}')" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "## Method 2: Clustering with `cluster()`\n", + "\n", + "The `cluster()` method:\n", + "\n", + "1. **Clusters similar days** using the TSAM (Time Series Aggregation Module) package\n", + "2. **Reduces timesteps** to only typical periods (e.g., 8 typical days = 768 timesteps)\n", + "3. **Weights costs** by how many original days each typical day represents\n", + "4. **Handles storage** with configurable behavior via `storage_mode`\n", + "\n", + "!!! warning \"Peak Forcing\"\n", + " Always use `time_series_for_high_peaks` to ensure extreme demand days are captured.\n", + " Without this, clustering may miss peak periods, causing undersized components." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "start = timeit.default_timer()\n", + "\n", + "# IMPORTANT: Force inclusion of peak demand periods!\n", + "peak_series = ['HeatDemand(Q_th)|fixed_relative_profile']\n", + "\n", + "# Create reduced FlowSystem with 8 typical days\n", + "fs_clustered = flow_system.transform.cluster(\n", + " n_clusters=8, # 8 typical days\n", + " cluster_duration='1D', # Daily clustering\n", + " time_series_for_high_peaks=peak_series, # Capture peak demand day\n", + ")\n", + "\n", + "time_clustering = timeit.default_timer() - start\n", + "print(f'Clustering time: {time_clustering:.1f} seconds')\n", + "print(f'Reduced: {len(flow_system.timesteps)} → {len(fs_clustered.timesteps)} timesteps')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "# Optimize the reduced system\n", + "start = timeit.default_timer()\n", + "fs_clustered.optimize(solver)\n", + "time_clustered = timeit.default_timer() - start\n", + "\n", + "print(f'Clustered optimization: {time_clustered:.1f} seconds')\n", + "print(f'Total cost: {fs_clustered.solution[\"costs\"].item():,.0f} €')\n", + "print(f'\\nSpeedup vs full: {time_full / (time_clustering + time_clustered):.1f}x')\n", + "print('\\nOptimized sizes:')\n", + "for name, size in fs_clustered.statistics.sizes.items():\n", + " print(f' {name}: {float(size.item()):.1f}')" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "## Understanding the Clustering\n", + "\n", + "The clustering algorithm groups similar days together. Let's inspect the cluster structure:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "# Show clustering info\n", + "info = fs_clustered.clustering\n", + "cs = info.result.cluster_structure\n", + "print('Clustering Configuration:')\n", + "print(f' Number of typical periods: {cs.n_clusters}')\n", + "print(f' Timesteps per period: {cs.timesteps_per_cluster}')\n", + "print(f' Total reduced timesteps: {cs.n_clusters * cs.timesteps_per_cluster}')\n", + "print(f' Cluster order (first 10 days): {cs.cluster_order.values[:10]}...')\n", + "\n", + "# Show how many times each cluster appears\n", + "cluster_order = cs.cluster_order.values\n", + "unique, counts = np.unique(cluster_order, return_counts=True)\n", + "print('\\nCluster occurrences:')\n", + "for cluster_id, count in zip(unique, counts, strict=False):\n", + " print(f' Cluster {cluster_id}: {count} days')" + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, + "source": [ + "## Method 3: Two-Stage Workflow (Recommended)\n", + "\n", + "The recommended approach for investment optimization:\n", + "\n", + "1. **Stage 1**: Fast sizing with `cluster()` \n", + "2. **Stage 2**: Fix sizes (with safety margin) and dispatch at full resolution\n", + "\n", + "!!! tip \"Safety Margin\"\n", + " Typical periods aggregate similar days, so individual days may have higher demand \n", + " than the typical day. Adding a 5-10% margin ensures feasibility." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "# Stage 1 already done above\n", + "print('Stage 1: Sizing with typical periods')\n", + "print(f' Time: {time_clustering + time_clustered:.1f} seconds')\n", + "print(f' Cost estimate: {fs_clustered.solution[\"costs\"].item():,.0f} €')\n", + "\n", + "# Apply safety margin to sizes\n", + "SAFETY_MARGIN = 1.05 # 5% buffer\n", + "sizes_with_margin = {name: float(size.item()) * SAFETY_MARGIN for name, size in fs_clustered.statistics.sizes.items()}\n", + "print(f'\\nSizes with {(SAFETY_MARGIN - 1) * 100:.0f}% safety margin:')\n", + "for name, size in sizes_with_margin.items():\n", + " original = fs_clustered.statistics.sizes[name].item()\n", + " print(f' {name}: {original:.1f} → {size:.1f}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "# Stage 2: Fix sizes and optimize at full resolution\n", + "print('Stage 2: Dispatch at full resolution')\n", + "start = timeit.default_timer()\n", + "\n", + "fs_dispatch = flow_system.transform.fix_sizes(sizes_with_margin)\n", + "fs_dispatch.optimize(solver)\n", + "\n", + "time_dispatch = timeit.default_timer() - start\n", + "print(f' Time: {time_dispatch:.1f} seconds')\n", + "print(f' Actual cost: {fs_dispatch.solution[\"costs\"].item():,.0f} €')\n", + "\n", + "# Total comparison\n", + "total_two_stage = time_clustering + time_clustered + time_dispatch\n", + "print(f'\\nTotal two-stage time: {total_two_stage:.1f} seconds')\n", + "print(f'Speedup vs full: {time_full / total_two_stage:.1f}x')" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "## Compare Results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "results = {\n", + " 'Full (baseline)': {\n", + " 'Time [s]': time_full,\n", + " 'Cost [€]': fs_full.solution['costs'].item(),\n", + " 'CHP': fs_full.statistics.sizes['CHP(Q_th)'].item(),\n", + " 'Boiler': fs_full.statistics.sizes['Boiler(Q_th)'].item(),\n", + " 'Storage': fs_full.statistics.sizes['Storage'].item(),\n", + " },\n", + " 'Clustered (8 days)': {\n", + " 'Time [s]': time_clustering + time_clustered,\n", + " 'Cost [€]': fs_clustered.solution['costs'].item(),\n", + " 'CHP': fs_clustered.statistics.sizes['CHP(Q_th)'].item(),\n", + " 'Boiler': fs_clustered.statistics.sizes['Boiler(Q_th)'].item(),\n", + " 'Storage': fs_clustered.statistics.sizes['Storage'].item(),\n", + " },\n", + " 'Two-Stage': {\n", + " 'Time [s]': total_two_stage,\n", + " 'Cost [€]': fs_dispatch.solution['costs'].item(),\n", + " 'CHP': sizes_with_margin['CHP(Q_th)'],\n", + " 'Boiler': sizes_with_margin['Boiler(Q_th)'],\n", + " 'Storage': sizes_with_margin['Storage'],\n", + " },\n", + "}\n", + "\n", + "comparison = pd.DataFrame(results).T\n", + "baseline_cost = comparison.loc['Full (baseline)', 'Cost [€]']\n", + "baseline_time = comparison.loc['Full (baseline)', 'Time [s]']\n", + "comparison['Cost Gap [%]'] = ((comparison['Cost [€]'] - baseline_cost) / abs(baseline_cost) * 100).round(2)\n", + "comparison['Speedup'] = (baseline_time / comparison['Time [s]']).round(1)\n", + "\n", + "comparison.style.format(\n", + " {\n", + " 'Time [s]': '{:.1f}',\n", + " 'Cost [€]': '{:,.0f}',\n", + " 'CHP': '{:.1f}',\n", + " 'Boiler': '{:.1f}',\n", + " 'Storage': '{:.0f}',\n", + " 'Cost Gap [%]': '{:.2f}',\n", + " 'Speedup': '{:.1f}x',\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "17", + "metadata": {}, + "source": [ + "## Expand Solution to Full Resolution\n", + "\n", + "Use `expand_solution()` to map the clustered solution back to all original timesteps.\n", + "This repeats the typical period values for all days belonging to that cluster:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "# Expand the clustered solution to full resolution\n", + "fs_expanded = fs_clustered.transform.expand_solution()\n", + "\n", + "print(f'Expanded: {len(fs_clustered.timesteps)} → {len(fs_expanded.timesteps)} timesteps')\n", + "print(f'Cost: {fs_expanded.solution[\"costs\"].item():,.0f} €')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "# Compare heat balance: Full vs Expanded\n", + "fig = make_subplots(rows=2, cols=1, shared_xaxes=True, subplot_titles=['Full Optimization', 'Expanded from Clustering'])\n", + "\n", + "# Full\n", + "for var in ['CHP(Q_th)', 'Boiler(Q_th)']:\n", + " values = fs_full.solution[f'{var}|flow_rate'].values\n", + " fig.add_trace(go.Scatter(x=fs_full.timesteps, y=values, name=var, legendgroup=var, showlegend=True), row=1, col=1)\n", + "\n", + "# Expanded\n", + "for var in ['CHP(Q_th)', 'Boiler(Q_th)']:\n", + " values = fs_expanded.solution[f'{var}|flow_rate'].values\n", + " fig.add_trace(\n", + " go.Scatter(x=fs_expanded.timesteps, y=values, name=var, legendgroup=var, showlegend=False), row=2, col=1\n", + " )\n", + "\n", + "fig.update_layout(height=500, title='Heat Production Comparison')\n", + "fig.update_yaxes(title_text='MW', row=1, col=1)\n", + "fig.update_yaxes(title_text='MW', row=2, col=1)\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "20", + "metadata": {}, + "source": [ + "## Visualize Clustered Heat Balance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "fs_clustered.statistics.plot.storage('Storage')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": {}, + "outputs": [], + "source": [ + "fs_expanded.statistics.plot.storage('Storage')" + ] + }, + { + "cell_type": "markdown", + "id": "23", + "metadata": {}, + "source": [ + "## API Reference\n", + "\n", + "### `transform.cluster()` Parameters\n", + "\n", + "| Parameter | Type | Description |\n", + "|-----------|------|-------------|\n", + "| `n_clusters` | `int` | Number of typical periods (e.g., 8 typical days) |\n", + "| `cluster_duration` | `str \\| float` | Duration per cluster ('1D', '24h') or hours |\n", + "| `weights` | `dict[str, float]` | Optional weights for time series in clustering |\n", + "| `time_series_for_high_peaks` | `list[str]` | **Essential**: Force inclusion of peak periods |\n", + "| `time_series_for_low_peaks` | `list[str]` | Force inclusion of minimum periods |\n", + "\n", + "### Storage Behavior\n", + "\n", + "Each `Storage` component has a `cluster_storage_mode` parameter that controls how it behaves during clustering:\n", + "\n", + "| Mode | Description |\n", + "|------|-------------|\n", + "| `'intercluster_cyclic'` | Links storage across clusters + yearly cyclic **(default)** |\n", + "| `'intercluster'` | Links storage across clusters, free start/end |\n", + "| `'cyclic'` | Each cluster is independent but cyclic (start = end) |\n", + "| `'independent'` | Each cluster is independent, free start/end |\n", + "\n", + "For a detailed comparison of storage modes, see [08c2-clustering-storage-modes](08c2-clustering-storage-modes.ipynb).\n", + "\n", + "### Peak Forcing Format\n", + "\n", + "```python\n", + "time_series_for_high_peaks = ['ComponentName(FlowName)|fixed_relative_profile']\n", + "```\n", + "\n", + "### Recommended Workflow\n", + "\n", + "```python\n", + "# Stage 1: Fast sizing\n", + "fs_sizing = flow_system.transform.cluster(\n", + " n_clusters=8,\n", + " cluster_duration='1D',\n", + " time_series_for_high_peaks=['Demand(Flow)|fixed_relative_profile'],\n", + ")\n", + "fs_sizing.optimize(solver)\n", + "\n", + "# Apply safety margin\n", + "sizes = {k: v.item() * 1.05 for k, v in fs_sizing.statistics.sizes.items()}\n", + "\n", + "# Stage 2: Accurate dispatch\n", + "fs_dispatch = flow_system.transform.fix_sizes(sizes)\n", + "fs_dispatch.optimize(solver)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "24", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "You learned how to:\n", + "\n", + "- Use **`cluster()`** to reduce time series into typical periods\n", + "- Apply **peak forcing** to capture extreme demand days\n", + "- Use **two-stage optimization** for fast yet accurate investment decisions\n", + "- **Expand solutions** back to full resolution with `expand_solution()`\n", + "\n", + "### Key Takeaways\n", + "\n", + "1. **Always use peak forcing** (`time_series_for_high_peaks`) for demand time series\n", + "2. **Add safety margin** (5-10%) when fixing sizes from clustering\n", + "3. **Two-stage is recommended**: clustering for sizing, full resolution for dispatch\n", + "4. **Storage handling** is configurable via `storage_mode`\n", + "\n", + "### Next Steps\n", + "\n", + "- **[08c2-clustering-storage-modes](08c2-clustering-storage-modes.ipynb)**: Compare storage modes using a seasonal storage system\n", + "- **[08d-clustering-multiperiod](08d-clustering-multiperiod.ipynb)**: Clustering with multiple periods and scenarios" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/notebooks/08c2-clustering-storage-modes.ipynb b/docs/notebooks/08c2-clustering-storage-modes.ipynb new file mode 100644 index 000000000..d6201382d --- /dev/null +++ b/docs/notebooks/08c2-clustering-storage-modes.ipynb @@ -0,0 +1,425 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Clustering Storage Modes\n", + "\n", + "Compare different storage handling modes when clustering time series.\n", + "\n", + "This notebook demonstrates:\n", + "\n", + "- **Four storage modes**: `independent`, `cyclic`, `intercluster`, `intercluster_cyclic`\n", + "- **Seasonal storage**: Why inter-cluster linking matters for long-term storage\n", + "- **When to use each mode**: Choosing the right mode for your application\n", + "\n", + "!!! note \"Prerequisites\"\n", + " Read [08c-clustering](08c-clustering.ipynb) first for clustering basics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "import timeit\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import plotly.graph_objects as go\n", + "from plotly.subplots import make_subplots\n", + "\n", + "import flixopt as fx\n", + "\n", + "fx.CONFIG.notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "2", + "metadata": {}, + "source": [ + "## Load the Seasonal Storage System\n", + "\n", + "We use a solar thermal + seasonal pit storage system with a full year of data.\n", + "This is ideal for demonstrating storage modes because:\n", + "\n", + "- **Solar peaks in summer** when heat demand is low\n", + "- **Heat demand peaks in winter** when solar is minimal\n", + "- **Seasonal storage** bridges this gap by storing summer heat for winter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "# Generate example data if not present\n", + "data_file = Path('data/seasonal_storage_system.nc4')\n", + "if not data_file.exists():\n", + " from data.generate_example_systems import create_seasonal_storage_system\n", + "\n", + " fs = create_seasonal_storage_system()\n", + " fs.to_netcdf(data_file)\n", + "\n", + "# Load the seasonal storage system\n", + "flow_system = fx.FlowSystem.from_netcdf(data_file)\n", + "\n", + "timesteps = flow_system.timesteps\n", + "print(f'Loaded FlowSystem: {len(timesteps)} timesteps ({len(timesteps) / 24:.0f} days)')\n", + "print(f'Components: {list(flow_system.components.keys())}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "# Visualize the seasonal patterns\n", + "solar_profile = flow_system.components['SolarThermal'].outputs[0].fixed_relative_profile\n", + "heat_demand = flow_system.components['HeatDemand'].inputs[0].fixed_relative_profile\n", + "\n", + "# Daily average for clearer visualization\n", + "solar_daily = solar_profile.values.reshape(-1, 24).mean(axis=1)\n", + "demand_daily = heat_demand.values.reshape(-1, 24).mean(axis=1)\n", + "days = np.arange(len(solar_daily))\n", + "\n", + "fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1)\n", + "fig.add_trace(go.Scatter(x=days, y=solar_daily, name='Solar (daily avg)', fill='tozeroy'), row=1, col=1)\n", + "fig.add_trace(go.Scatter(x=days, y=demand_daily, name='Heat Demand (daily avg)', fill='tozeroy'), row=2, col=1)\n", + "fig.update_layout(height=400, title='Seasonal Mismatch: Solar vs Heat Demand')\n", + "fig.update_xaxes(title_text='Day of Year', row=2, col=1)\n", + "fig.update_yaxes(title_text='Solar Profile', row=1, col=1)\n", + "fig.update_yaxes(title_text='Heat Demand [MW]', row=2, col=1)\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "## Understanding Storage Modes\n", + "\n", + "When clustering reduces a full year to typical periods (e.g., 12 typical days), we need to\n", + "decide how storage behaves across these periods. Each `Storage` component has a \n", + "`cluster_storage_mode` parameter with four options:\n", + "\n", + "| Mode | Description | Use Case |\n", + "|------|-------------|----------|\n", + "| `'intercluster_cyclic'` | Links storage across clusters + yearly cyclic | **Default**. Seasonal storage, yearly optimization |\n", + "| `'intercluster'` | Links storage across clusters, free start/end | Multi-year optimization, flexible boundaries |\n", + "| `'cyclic'` | Each cluster independent, but cyclic (start = end) | Daily storage only, no seasonal effects |\n", + "| `'independent'` | Each cluster independent, free start/end | Fastest solve, ignores long-term storage |\n", + "\n", + "Let's compare them!" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "## Baseline: Full Year Optimization\n", + "\n", + "First, optimize the full system to establish a baseline:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": { + "jupyter": { + "is_executing": true + } + }, + "outputs": [], + "source": [ + "solver = fx.solvers.HighsSolver(mip_gap=0.02)\n", + "\n", + "start = timeit.default_timer()\n", + "fs_full = flow_system.copy()\n", + "fs_full.optimize(solver)\n", + "time_full = timeit.default_timer() - start\n", + "\n", + "print(f'Full optimization: {time_full:.1f} seconds')\n", + "print(f'Total cost: {fs_full.solution[\"costs\"].item():,.0f} EUR')\n", + "print('\\nOptimized sizes:')\n", + "for name, size in fs_full.statistics.sizes.items():\n", + " print(f' {name}: {float(size.item()):.2f}')" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "## Compare Storage Modes\n", + "\n", + "Now let's cluster with each storage mode and compare results.\n", + "We set `cluster_storage_mode` on the Storage component before calling `cluster()`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "# Clustering parameters\n", + "N_CLUSTERS = 12 # 12 typical days for a full year\n", + "CLUSTER_DURATION = '1D'\n", + "PEAK_SERIES = ['HeatDemand(Q_th)|fixed_relative_profile']\n", + "\n", + "# Storage modes to compare\n", + "storage_modes = ['independent', 'cyclic', 'intercluster', 'intercluster_cyclic']\n", + "\n", + "results = {}\n", + "clustered_systems = {}\n", + "\n", + "for mode in storage_modes:\n", + " print(f'\\n--- Mode: {mode} ---')\n", + "\n", + " # Create a copy and set the storage mode\n", + " fs_copy = flow_system.copy()\n", + " fs_copy.components['SeasonalStorage'].cluster_storage_mode = mode\n", + "\n", + " start = timeit.default_timer()\n", + " fs_clustered = fs_copy.transform.cluster(\n", + " n_clusters=N_CLUSTERS,\n", + " cluster_duration=CLUSTER_DURATION,\n", + " time_series_for_high_peaks=PEAK_SERIES,\n", + " )\n", + " time_cluster = timeit.default_timer() - start\n", + "\n", + " start = timeit.default_timer()\n", + " fs_clustered.optimize(solver)\n", + " time_solve = timeit.default_timer() - start\n", + "\n", + " clustered_systems[mode] = fs_clustered\n", + "\n", + " results[mode] = {\n", + " 'Time [s]': time_cluster + time_solve,\n", + " 'Cost [EUR]': fs_clustered.solution['costs'].item(),\n", + " 'Solar [MW]': fs_clustered.statistics.sizes.get('SolarThermal(Q_th)', 0),\n", + " 'Boiler [MW]': fs_clustered.statistics.sizes.get('GasBoiler(Q_th)', 0),\n", + " 'Storage [MWh]': fs_clustered.statistics.sizes.get('SeasonalStorage', 0),\n", + " }\n", + "\n", + " # Handle xarray types\n", + " for key in ['Solar [MW]', 'Boiler [MW]', 'Storage [MWh]']:\n", + " val = results[mode][key]\n", + " results[mode][key] = float(val.item()) if hasattr(val, 'item') else float(val)\n", + "\n", + " print(f' Time: {results[mode][\"Time [s]\"]:.1f}s')\n", + " print(f' Cost: {results[mode][\"Cost [EUR]\"]:,.0f} EUR')\n", + " print(f' Storage: {results[mode][\"Storage [MWh]\"]:.0f} MWh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "# Add full optimization result for comparison\n", + "results['Full (baseline)'] = {\n", + " 'Time [s]': time_full,\n", + " 'Cost [EUR]': fs_full.solution['costs'].item(),\n", + " 'Solar [MW]': float(fs_full.statistics.sizes.get('SolarThermal(Q_th)', 0).item()),\n", + " 'Boiler [MW]': float(fs_full.statistics.sizes.get('GasBoiler(Q_th)', 0).item()),\n", + " 'Storage [MWh]': float(fs_full.statistics.sizes.get('SeasonalStorage', 0).item()),\n", + "}\n", + "\n", + "# Create comparison DataFrame\n", + "comparison = pd.DataFrame(results).T\n", + "baseline_cost = comparison.loc['Full (baseline)', 'Cost [EUR]']\n", + "baseline_time = comparison.loc['Full (baseline)', 'Time [s]']\n", + "comparison['Cost Gap [%]'] = (comparison['Cost [EUR]'] - baseline_cost) / abs(baseline_cost) * 100\n", + "comparison['Speedup'] = baseline_time / comparison['Time [s]']\n", + "\n", + "comparison.style.format(\n", + " {\n", + " 'Time [s]': '{:.1f}',\n", + " 'Cost [EUR]': '{:,.0f}',\n", + " 'Solar [MW]': '{:.1f}',\n", + " 'Boiler [MW]': '{:.1f}',\n", + " 'Storage [MWh]': '{:.0f}',\n", + " 'Cost Gap [%]': '{:+.1f}',\n", + " 'Speedup': '{:.1f}x',\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "## Visualize Storage Behavior\n", + "\n", + "The key difference between modes is how storage is utilized across the year.\n", + "Let's expand each solution back to full resolution and compare:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "# Expand clustered solutions to full resolution\n", + "expanded_systems = {}\n", + "for mode in storage_modes:\n", + " expanded_systems[mode] = clustered_systems[mode].transform.expand_solution()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot storage charge state for each mode\n", + "fig = make_subplots(\n", + " rows=len(storage_modes) + 1,\n", + " cols=1,\n", + " shared_xaxes=True,\n", + " vertical_spacing=0.05,\n", + " subplot_titles=['Full Optimization'] + [f'Mode: {m}' for m in storage_modes],\n", + ")\n", + "\n", + "# Full optimization\n", + "soc_full = fs_full.solution['SeasonalStorage|charge_state']\n", + "fig.add_trace(go.Scatter(x=fs_full.timesteps, y=soc_full.values, name='Full', line=dict(width=0.8)), row=1, col=1)\n", + "\n", + "# Expanded clustered solutions\n", + "for i, mode in enumerate(storage_modes, start=2):\n", + " fs_exp = expanded_systems[mode]\n", + " soc = fs_exp.solution['SeasonalStorage|charge_state']\n", + " fig.add_trace(go.Scatter(x=fs_exp.timesteps, y=soc.values, name=mode, line=dict(width=0.8)), row=i, col=1)\n", + "\n", + "fig.update_layout(height=800, title='Storage Charge State by Mode', showlegend=False)\n", + "for i in range(1, len(storage_modes) + 2):\n", + " fig.update_yaxes(title_text='SOC [MWh]', row=i, col=1)\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": {}, + "source": [ + "## Interpretation\n", + "\n", + "### `'independent'` Mode\n", + "- Each typical period is solved independently\n", + "- Storage starts and ends at arbitrary states within each cluster\n", + "- **No seasonal storage benefit captured** - storage is only used for daily fluctuations\n", + "- Fastest to solve but least accurate for seasonal systems\n", + "\n", + "### `'cyclic'` Mode \n", + "- Each cluster is independent but enforces start = end state\n", + "- Better than independent but still **no cross-season linking**\n", + "- Good for systems where storage only balances within-day variations\n", + "\n", + "### `'intercluster'` Mode\n", + "- Links storage state across the original time series via typical periods\n", + "- **Captures seasonal storage behavior** - summer charging, winter discharging\n", + "- Free start and end states (useful for multi-year optimization)\n", + "\n", + "### `'intercluster_cyclic'` Mode (Default)\n", + "- Inter-cluster linking **plus** yearly cyclic constraint (end = start)\n", + "- **Best for yearly investment optimization** with seasonal storage\n", + "- Ensures the storage cycle is sustainable year after year" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "## When to Use Each Mode\n", + "\n", + "| Your System Has... | Recommended Mode |\n", + "|-------------------|------------------|\n", + "| Seasonal storage (pit, underground) | `'intercluster_cyclic'` |\n", + "| Only daily storage (batteries, hot water tanks) | `'cyclic'` |\n", + "| Multi-year optimization with inter-annual storage | `'intercluster'` |\n", + "| Quick sizing estimate, storage not critical | `'independent'` |\n", + "\n", + "### Setting the Mode\n", + "\n", + "```python\n", + "# Option 1: Set when creating the Storage\n", + "storage = fx.Storage(\n", + " 'SeasonalStorage',\n", + " capacity_in_flow_hours=5000,\n", + " cluster_storage_mode='intercluster_cyclic', # default\n", + " ...\n", + ")\n", + "\n", + "# Option 2: Modify before clustering\n", + "flow_system.components['SeasonalStorage'].cluster_storage_mode = 'cyclic'\n", + "fs_clustered = flow_system.transform.cluster(...)\n", + "```\n", + "\n", + "!!! tip \"Rule of Thumb\"\n", + " Use `'intercluster_cyclic'` (default) unless you have a specific reason not to.\n", + " It provides the most accurate representation of storage behavior in clustered systems." + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "You learned how to:\n", + "\n", + "- Use **`cluster_storage_mode`** on Storage components to control behavior in clustering\n", + "- Understand the **difference between modes** and their impact on results\n", + "- Choose the **right mode** for your optimization problem\n", + "\n", + "### Key Takeaways\n", + "\n", + "1. **Seasonal storage requires inter-cluster linking** to capture charging/discharging across seasons\n", + "2. **`'intercluster_cyclic'`** is the default and best for yearly investment optimization\n", + "3. **`'independent'` and `'cyclic'`** are faster but miss long-term storage value\n", + "4. **Expand solutions** with `expand_solution()` to visualize storage behavior across the year\n", + "\n", + "### Next Steps\n", + "\n", + "- **[08d-clustering-multiperiod](08d-clustering-multiperiod.ipynb)**: Clustering with multiple periods and scenarios" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/notebooks/08d-clustering-multiperiod.ipynb b/docs/notebooks/08d-clustering-multiperiod.ipynb new file mode 100644 index 000000000..84ff468ea --- /dev/null +++ b/docs/notebooks/08d-clustering-multiperiod.ipynb @@ -0,0 +1,582 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Multi-Period Clustering with `cluster()`\n", + "\n", + "Combine time series clustering with multi-period investment optimization.\n", + "\n", + "This notebook demonstrates:\n", + "\n", + "- **Multi-period modeling**: Optimize investments across multiple planning periods (years)\n", + "- **Scenario analysis**: Handle demand uncertainty with weighted scenarios\n", + "- **Clustering per period**: Apply typical-period clustering independently for each period/scenario\n", + "- **Scalability**: Reduce computational complexity for long-horizon planning\n", + "\n", + "!!! note \"Requirements\"\n", + " This notebook requires the `tsam` package: `pip install tsam`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "import timeit\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import plotly.express as px\n", + "\n", + "import flixopt as fx\n", + "\n", + "fx.CONFIG.notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "2", + "metadata": {}, + "source": [ + "## Load the Multi-Period System\n", + "\n", + "We use a pre-built multi-period heating system with:\n", + "- **3 planning periods** (years 2024, 2025, 2026)\n", + "- **2 scenarios** (high demand 30%, low demand 70%)\n", + "- **2 weeks** at hourly resolution (336 timesteps)\n", + "\n", + "This represents a capacity expansion problem where we optimize component sizes once,\n", + "but operations are simulated across multiple future years and demand scenarios." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "# Generate example data if not present\n", + "data_file = Path('data/multiperiod_system.nc4')\n", + "if not data_file.exists():\n", + " from data.generate_example_systems import create_multiperiod_system\n", + "\n", + " fs = create_multiperiod_system()\n", + " fs.to_netcdf(data_file)\n", + "\n", + "# Load the multi-period system\n", + "flow_system = fx.FlowSystem.from_netcdf(data_file)\n", + "\n", + "print(f'Timesteps: {len(flow_system.timesteps)} ({len(flow_system.timesteps) // 24} days)')\n", + "print(f'Periods: {list(flow_system.periods.values)}')\n", + "print(f'Scenarios: {list(flow_system.scenarios.values)}')\n", + "print(f'Scenario weights: {flow_system.scenario_weights.values}')\n", + "print(f'\\nComponents: {list(flow_system.components.keys())}')" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## Selecting a Subset with `transform.isel()`\n", + "\n", + "For demonstration purposes, we'll use only the first week of data.\n", + "The `isel()` method (index select) lets you slice FlowSystems by time:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "# Select first week only (168 hours)\n", + "flow_system = flow_system.transform.isel(time=slice(0, 168))\n", + "\n", + "print(f'After isel: {len(flow_system.timesteps)} timesteps ({len(flow_system.timesteps) // 24} days)')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "# Visualize demand scenarios\n", + "heat_demand = flow_system.components['Building'].inputs[0].fixed_relative_profile\n", + "\n", + "fig = px.line(\n", + " heat_demand.to_dataframe('value').reset_index(), x='time', y='value', facet_col='period', facet_row='scenario'\n", + ")\n", + "\n", + "fig.update_layout(\n", + " height=350,\n", + " title='Heat Demand by Scenario (One Week)',\n", + " xaxis_title='Time',\n", + " yaxis_title='Heat Demand [kW]',\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "## Full Optimization (Baseline)\n", + "\n", + "First, solve the complete problem with all timesteps across all periods and scenarios:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "solver = fx.solvers.HighsSolver(mip_gap=0.01)\n", + "\n", + "start = timeit.default_timer()\n", + "fs_full = flow_system.copy()\n", + "fs_full.optimize(solver)\n", + "time_full = timeit.default_timer() - start\n", + "\n", + "print(f'Full optimization: {time_full:.2f} seconds')\n", + "print(f'Total cost (objective): {fs_full.solution[\"objective\"].item():,.0f} €')\n", + "print('\\nOptimized sizes:')\n", + "for name, size in fs_full.statistics.sizes.items():\n", + " print(f' {name}: {size.max().item():.1f}')" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "## Multi-Period Clustering with `cluster()`\n", + "\n", + "When applied to a multi-period system, `cluster()` clusters **each period/scenario combination independently**.\n", + "This is because demand patterns and optimal operations may differ across:\n", + "\n", + "- **Periods**: Different years may have different characteristics\n", + "- **Scenarios**: High vs low demand scenarios need different representative days\n", + "\n", + "The investment decisions (sizes) remain consistent across all periods and scenarios,\n", + "while the operational patterns are optimized for each cluster." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "start = timeit.default_timer()\n", + "\n", + "# Force inclusion of peak demand periods\n", + "peak_series = ['Building(Heat)|fixed_relative_profile']\n", + "\n", + "# Cluster to 3 typical days (from 7 days)\n", + "fs_clustered = flow_system.transform.cluster(\n", + " n_clusters=3,\n", + " cluster_duration='1D',\n", + " time_series_for_high_peaks=peak_series,\n", + ")\n", + "\n", + "time_clustering = timeit.default_timer() - start\n", + "\n", + "print(f'Clustering time: {time_clustering:.2f} seconds')\n", + "print(f'Reduced: {len(flow_system.timesteps)} → {len(fs_clustered.timesteps)} timesteps per period')\n", + "print('Total problem reduction: 7 days × 3 periods × 2 scenarios → 3 days × 3 × 2')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "# Optimize the reduced system\n", + "start = timeit.default_timer()\n", + "fs_clustered.optimize(solver)\n", + "time_clustered = timeit.default_timer() - start\n", + "\n", + "print(f'Clustered optimization: {time_clustered:.2f} seconds')\n", + "print(f'Total cost (objective): {fs_clustered.solution[\"objective\"].item():,.0f} €')\n", + "print(f'\\nSpeedup vs full: {time_full / (time_clustering + time_clustered):.1f}x')\n", + "print('\\nOptimized sizes:')\n", + "for name, size in fs_clustered.statistics.sizes.items():\n", + " print(f' {name}: {size.max().item():.1f}')" + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, + "source": [ + "## Visualize Clustering Quality\n", + "\n", + "The `.plot` accessor provides built-in visualizations with automatic faceting by period and scenario:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "# Compare original vs aggregated data - automatically faceted by period and scenario\n", + "fs_clustered.clustering.plot.compare(variables='Building(Heat)|fixed_relative_profile')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "# Duration curves show how well the distribution is preserved per period/scenario\n", + "fs_clustered.clustering.plot.compare(\n", + " kind='duration_curve',\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "# Heatmap shows cluster assignments - faceted by period and scenario\n", + "fs_clustered.clustering.plot.heatmap()" + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "## Understand the Cluster Structure\n", + "\n", + "Let's inspect how days were grouped into clusters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "info = fs_clustered.clustering\n", + "cs = info.result.cluster_structure\n", + "\n", + "print('Clustering Configuration:')\n", + "print(f' Typical periods (clusters): {cs.n_clusters}')\n", + "print(f' Timesteps per cluster: {cs.timesteps_per_cluster}')\n", + "\n", + "# The cluster_order shows which cluster each original day belongs to\n", + "cluster_order = cs.cluster_order.values\n", + "day_names = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']\n", + "\n", + "print('\\nCluster assignments per day:')\n", + "for i, cluster_id in enumerate(cluster_order):\n", + " print(f' {day_names[i]}: Cluster {cluster_id}')\n", + "\n", + "# Cluster occurrences (how many original days each cluster represents)\n", + "unique, counts = np.unique(cluster_order, return_counts=True)\n", + "print('\\nCluster weights (days represented):')\n", + "for cluster_id, count in zip(unique, counts, strict=False):\n", + " print(f' Cluster {cluster_id}: {count} days')" + ] + }, + { + "cell_type": "markdown", + "id": "18", + "metadata": {}, + "source": [ + "## Two-Stage Workflow for Multi-Period\n", + "\n", + "For investment optimization across multiple periods, the recommended workflow is:\n", + "\n", + "1. **Stage 1**: Fast sizing with clustering (reduced timesteps)\n", + "2. **Stage 2**: Fix sizes and run full-resolution dispatch\n", + "\n", + "This gives accurate investment decisions while maintaining computational tractability." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "# Stage 1 already done - apply safety margin\n", + "SAFETY_MARGIN = 1.10 # 10% buffer for multi-period uncertainty\n", + "\n", + "sizes_with_margin = {name: size.max().item() * SAFETY_MARGIN for name, size in fs_clustered.statistics.sizes.items()}\n", + "\n", + "print('Stage 1: Sizing with clustering')\n", + "print(f' Time: {time_clustering + time_clustered:.2f} seconds')\n", + "print(f' Cost estimate: {fs_clustered.solution[\"objective\"].item():,.0f} €')\n", + "print(f'\\nSizes with {(SAFETY_MARGIN - 1) * 100:.0f}% safety margin:')\n", + "for name, size in sizes_with_margin.items():\n", + " original = fs_clustered.statistics.sizes[name].max().item()\n", + " print(f' {name}: {original:.1f} → {size:.1f}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "# Stage 2: Full resolution dispatch with fixed sizes\n", + "print('Stage 2: Full resolution dispatch')\n", + "start = timeit.default_timer()\n", + "\n", + "fs_dispatch = flow_system.transform.fix_sizes(sizes_with_margin)\n", + "fs_dispatch.optimize(solver)\n", + "\n", + "time_dispatch = timeit.default_timer() - start\n", + "\n", + "print(f' Time: {time_dispatch:.2f} seconds')\n", + "print(f' Actual cost: {fs_dispatch.solution[\"objective\"].item():,.0f} €')\n", + "\n", + "# Total comparison\n", + "total_two_stage = time_clustering + time_clustered + time_dispatch\n", + "print(f'\\nTotal two-stage time: {total_two_stage:.2f} seconds')\n", + "print(f'Speedup vs full: {time_full / total_two_stage:.1f}x')" + ] + }, + { + "cell_type": "markdown", + "id": "21", + "metadata": {}, + "source": [ + "## Compare Results Across Methods" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": {}, + "outputs": [], + "source": [ + "results = {\n", + " 'Full (baseline)': {\n", + " 'Time [s]': time_full,\n", + " 'Cost [€]': fs_full.solution['objective'].item(),\n", + " 'Boiler': fs_full.statistics.sizes['Boiler(Heat)'].max().item(),\n", + " 'Storage': fs_full.statistics.sizes['ThermalStorage'].max().item(),\n", + " },\n", + " 'Clustered (3 days)': {\n", + " 'Time [s]': time_clustering + time_clustered,\n", + " 'Cost [€]': fs_clustered.solution['objective'].item(),\n", + " 'Boiler': fs_clustered.statistics.sizes['Boiler(Heat)'].max().item(),\n", + " 'Storage': fs_clustered.statistics.sizes['ThermalStorage'].max().item(),\n", + " },\n", + " 'Two-Stage': {\n", + " 'Time [s]': total_two_stage,\n", + " 'Cost [€]': fs_dispatch.solution['objective'].item(),\n", + " 'Boiler': sizes_with_margin['Boiler(Heat)'],\n", + " 'Storage': sizes_with_margin['ThermalStorage'],\n", + " },\n", + "}\n", + "\n", + "comparison = pd.DataFrame(results).T\n", + "baseline_cost = comparison.loc['Full (baseline)', 'Cost [€]']\n", + "baseline_time = comparison.loc['Full (baseline)', 'Time [s]']\n", + "comparison['Cost Gap [%]'] = ((comparison['Cost [€]'] - baseline_cost) / abs(baseline_cost) * 100).round(2)\n", + "comparison['Speedup'] = (baseline_time / comparison['Time [s]']).round(1)\n", + "\n", + "comparison.style.format(\n", + " {\n", + " 'Time [s]': '{:.2f}',\n", + " 'Cost [€]': '{:,.0f}',\n", + " 'Boiler': '{:.1f}',\n", + " 'Storage': '{:.0f}',\n", + " 'Cost Gap [%]': '{:.2f}',\n", + " 'Speedup': '{:.1f}x',\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "23", + "metadata": {}, + "source": [ + "## Visualize Optimization Results\n", + "\n", + "Use the built-in statistics plotting to compare results across periods and scenarios:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot flow rates with automatic faceting by period and scenario\n", + "fs_full.statistics.plot.flows(component='Boiler')" + ] + }, + { + "cell_type": "markdown", + "id": "25", + "metadata": {}, + "source": [ + "## Expand Clustered Solution to Full Resolution\n", + "\n", + "Use `expand_solution()` to map the clustered results back to all original timesteps:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26", + "metadata": {}, + "outputs": [], + "source": [ + "# Expand the clustered solution\n", + "fs_expanded = fs_clustered.transform.expand_solution()\n", + "\n", + "print(f'Expanded: {len(fs_clustered.timesteps)} → {len(fs_expanded.timesteps)} timesteps')\n", + "print(f'Cost (objective): {fs_expanded.solution[\"objective\"].item():,.0f} €')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27", + "metadata": {}, + "outputs": [], + "source": [ + "# Compare expanded solution - shows the repeated cluster patterns\n", + "fs_expanded.statistics.plot.flows(component='Boiler')" + ] + }, + { + "cell_type": "markdown", + "id": "28", + "metadata": {}, + "source": [ + "## Key Considerations for Multi-Period Clustering\n", + "\n", + "### 1. Independent Clustering per Period/Scenario\n", + "\n", + "Each period and scenario combination is clustered independently because:\n", + "- Demand patterns may differ across years (growth, seasonality)\n", + "- Scenarios represent different futures that shouldn't be mixed\n", + "- Investment decisions must be robust across all combinations\n", + "\n", + "### 2. Safety Margins\n", + "\n", + "Multi-period systems often warrant larger safety margins (10-15%) because:\n", + "- More uncertainty across multiple years\n", + "- Investments made once must work for all periods\n", + "- Scenario weights may not perfectly represent actual outcomes\n", + "\n", + "### 3. Computational Benefits\n", + "\n", + "Clustering becomes more valuable as problem size grows:\n", + "\n", + "| Scenario | Full Problem | With Clustering |\n", + "|----------|--------------|----------------|\n", + "| 1 period, 1 scenario, 365 days | 8,760 timesteps | ~730 (10 typical days) |\n", + "| 3 periods, 2 scenarios, 365 days | 52,560 timesteps | ~4,380 |\n", + "| 10 periods, 3 scenarios, 365 days | 262,800 timesteps | ~21,900 |\n", + "\n", + "The speedup factor increases with problem size." + ] + }, + { + "cell_type": "markdown", + "id": "29", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "You learned how to:\n", + "\n", + "- Load **multi-period systems** with periods and scenarios\n", + "- Use **`transform.isel()`** to select time subsets\n", + "- Apply **`cluster()`** to multi-dimensional FlowSystems\n", + "- **Visualize clustering** with the `.plot` accessor (compare, duration curves, heatmaps)\n", + "- Use the **two-stage workflow** for robust investment optimization\n", + "- **Expand solutions** back to full resolution with `expand_solution()`\n", + "\n", + "### Key Takeaways\n", + "\n", + "1. **Clustering is applied per period/scenario**: Each combination gets independent typical periods\n", + "2. **Investments are shared**: Component sizes are optimized once across all periods/scenarios\n", + "3. **Use larger safety margins**: Multi-period uncertainty warrants 10-15% buffers\n", + "4. **Two-stage is recommended**: Fast sizing with clustering, accurate dispatch at full resolution\n", + "5. **Built-in plotting**: Use `.plot` accessor for automatic faceting by period/scenario\n", + "\n", + "### API Reference\n", + "\n", + "```python\n", + "# Load multi-period system\n", + "fs = fx.FlowSystem.from_netcdf('multiperiod_system.nc4')\n", + "\n", + "# Select time subset (optional)\n", + "fs = fs.transform.isel(time=slice(0, 168)) # First 168 timesteps\n", + "\n", + "# Cluster (applies per period/scenario)\n", + "fs_clustered = fs.transform.cluster(\n", + " n_clusters=10,\n", + " cluster_duration='1D',\n", + " time_series_for_high_peaks=['Demand(Flow)|fixed_relative_profile'],\n", + ")\n", + "\n", + "# Visualize clustering quality\n", + "fs_clustered.clustering.plot.compare(variable='Demand(Flow)|profile')\n", + "fs_clustered.clustering.plot.heatmap()\n", + "\n", + "# Two-stage workflow\n", + "fs_clustered.optimize(solver)\n", + "sizes = {k: v.max().item() * 1.10 for k, v in fs_clustered.statistics.sizes.items()}\n", + "fs_dispatch = fs.transform.fix_sizes(sizes)\n", + "fs_dispatch.optimize(solver)\n", + "\n", + "# Visualize results\n", + "fs_dispatch.statistics.plot.flows(component='Boiler')\n", + "```" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/notebooks/08e-clustering-internals.ipynb b/docs/notebooks/08e-clustering-internals.ipynb new file mode 100644 index 000000000..a0ac80ca7 --- /dev/null +++ b/docs/notebooks/08e-clustering-internals.ipynb @@ -0,0 +1,310 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Clustering Internals\n", + "\n", + "Understanding the data structures and visualization tools behind time series clustering.\n", + "\n", + "This notebook demonstrates:\n", + "\n", + "- **Data structures**: `Clustering`, `ClusterResult`, and `ClusterStructure`\n", + "- **Plot accessor**: Built-in visualizations via `.plot`\n", + "- **Data expansion**: Using `expand_data()` to map aggregated data back to original timesteps\n", + "\n", + "!!! note \"Prerequisites\"\n", + " This notebook assumes familiarity with [08c-clustering](08c-clustering.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import flixopt as fx\n", + "\n", + "fx.CONFIG.notebook()\n", + "\n", + "# Load the district heating system\n", + "data_file = Path('data/district_heating_system.nc4')\n", + "if not data_file.exists():\n", + " from data.generate_example_systems import create_district_heating_system\n", + "\n", + " fs = create_district_heating_system()\n", + " fs.to_netcdf(data_file)\n", + "\n", + "flow_system = fx.FlowSystem.from_netcdf(data_file)" + ] + }, + { + "cell_type": "markdown", + "id": "2", + "metadata": {}, + "source": [ + "## Clustering Metadata\n", + "\n", + "After calling `cluster()`, metadata is stored in `fs.clustering`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "fs_clustered = flow_system.transform.cluster(\n", + " n_clusters=8,\n", + " cluster_duration='1D',\n", + " time_series_for_high_peaks=['HeatDemand(Q_th)|fixed_relative_profile'],\n", + ")\n", + "\n", + "fs_clustered.clustering" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "The `Clustering` contains:\n", + "- **`result`**: A `ClusterResult` with timestep mapping and weights\n", + "- **`result.cluster_structure`**: A `ClusterStructure` with cluster assignments" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "fs_clustered.clustering.result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "fs_clustered.clustering.result.cluster_structure" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "## Visualizing Clustering\n", + "\n", + "The `.plot` accessor provides built-in visualizations for understanding clustering results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "# Compare original vs aggregated data as timeseries\n", + "# By default, plots all time-varying variables\n", + "fs_clustered.clustering.plot.compare()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "# Compare specific variables only\n", + "fs_clustered.clustering.plot.compare(variables='HeatDemand(Q_th)|fixed_relative_profile')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "# Duration curves show how well the aggregated data preserves the distribution\n", + "fs_clustered.clustering.plot.compare(kind='duration_curve').data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "# View typical period profiles for each cluster\n", + "# Each line represents a cluster's representative day\n", + "fs_clustered.clustering.plot.clusters(variables='HeatDemand(Q_th)|fixed_relative_profile')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "# Heatmap shows cluster assignments for each original period\n", + "fs_clustered.clustering.plot.heatmap()" + ] + }, + { + "cell_type": "markdown", + "id": "13", + "metadata": {}, + "source": [ + "## Expanding Aggregated Data\n", + "\n", + "The `ClusterResult.expand_data()` method maps aggregated data back to original timesteps.\n", + "This is useful for comparing clustering results before optimization:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "# Get original and aggregated data\n", + "result = fs_clustered.clustering.result\n", + "original = result.original_data['HeatDemand(Q_th)|fixed_relative_profile']\n", + "aggregated = result.aggregated_data['HeatDemand(Q_th)|fixed_relative_profile']\n", + "\n", + "# Expand aggregated data back to original timesteps\n", + "expanded = result.expand_data(aggregated)\n", + "\n", + "print(f'Original: {len(original.time)} timesteps')\n", + "print(f'Aggregated: {len(aggregated.time)} timesteps')\n", + "print(f'Expanded: {len(expanded.time)} timesteps')" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "| Class | Purpose |\n", + "|-------|--------|\n", + "| `Clustering` | Stored on `fs.clustering` after `cluster()` |\n", + "| `ClusterResult` | Contains timestep mapping, weights, and `expand_data()` method |\n", + "| `ClusterStructure` | Maps original periods to clusters |\n", + "\n", + "### Plot Accessor Methods\n", + "\n", + "| Method | Description |\n", + "|--------|-------------|\n", + "| `plot.compare()` | Compare original vs aggregated data (timeseries) |\n", + "| `plot.compare(kind='duration_curve')` | Compare as duration curves |\n", + "| `plot.clusters()` | View each cluster's profile |\n", + "| `plot.heatmap()` | Visualize cluster assignments |\n", + "\n", + "### Key Parameters\n", + "\n", + "```python\n", + "# Compare with options\n", + "clustering.plot.compare(\n", + " variables='Demand|profile', # Single variable, list, or None (all)\n", + " kind='timeseries', # 'timeseries' or 'duration_curve'\n", + " select={'scenario': 'Base'}, # xarray-style selection\n", + " colors='viridis', # Colorscale name, list, or dict\n", + " facet_col='period', # Facet by period if present\n", + " facet_row='scenario', # Facet by scenario if present\n", + ")\n", + "\n", + "# Heatmap shows cluster assignments (no variable needed)\n", + "clustering.plot.heatmap()\n", + "\n", + "# Expand aggregated data to original timesteps\n", + "result = clustering.result\n", + "expanded = result.expand_data(aggregated_data)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "## Cluster Weights\n", + "\n", + "Each representative timestep has a weight equal to the number of original periods it represents.\n", + "This ensures operational costs scale correctly:\n", + "\n", + "$$\\text{Objective} = \\sum_{t \\in \\text{typical}} w_t \\cdot c_t$$\n", + "\n", + "The weights sum to the original timestep count:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "print(f'Sum of weights: {fs_clustered.cluster_weight.sum().item():.0f}')\n", + "print(f'Original timesteps: {len(flow_system.timesteps)}')" + ] + }, + { + "cell_type": "markdown", + "id": "18", + "metadata": {}, + "source": [ + "## Solution Expansion\n", + "\n", + "After optimization, `expand_solution()` maps results back to full resolution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "solver = fx.solvers.HighsSolver(mip_gap=0.01, log_to_console=False)\n", + "fs_clustered.optimize(solver)\n", + "\n", + "fs_expanded = fs_clustered.transform.expand_solution()\n", + "\n", + "print(f'Clustered: {len(fs_clustered.timesteps)} timesteps')\n", + "print(f'Expanded: {len(fs_expanded.timesteps)} timesteps')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/notebooks/data/generate_example_systems.py b/docs/notebooks/data/generate_example_systems.py index 556463302..c8e81167f 100644 --- a/docs/notebooks/data/generate_example_systems.py +++ b/docs/notebooks/data/generate_example_systems.py @@ -1,9 +1,12 @@ -"""Generate example FlowSystem files for the plotting notebook. +"""Generate example FlowSystem files for notebooks. -This script creates three FlowSystems of varying complexity: +This script creates FlowSystems of varying complexity: 1. simple_system - Basic heat system (boiler + storage + sink) 2. complex_system - Multi-carrier with multiple effects and piecewise efficiency 3. multiperiod_system - System with periods and scenarios +4. district_heating_system - Real-world district heating data with investments (1 month) +5. operational_system - Real-world district heating for operational planning (2 weeks, no investments) +6. seasonal_storage_system - Solar thermal + seasonal pit storage (full year, 8760h) Run this script to regenerate the example data files. """ @@ -18,9 +21,11 @@ # Output directory (same as this script) try: OUTPUT_DIR = Path(__file__).parent + DATA_DIR = Path(__file__).parent # Zeitreihen2020.csv is in the same directory except NameError: # Running in notebook context (e.g., mkdocs-jupyter) OUTPUT_DIR = Path('docs/notebooks/data') + DATA_DIR = Path('docs/notebooks/data') def create_simple_system() -> fx.FlowSystem: @@ -229,6 +234,362 @@ def create_complex_system() -> fx.FlowSystem: return fs +def create_district_heating_system() -> fx.FlowSystem: + """Create a district heating system using real-world data. + + Based on Zeitreihen2020.csv data: + - One month of data at 15-minute resolution + - CHP, boiler, storage, and grid connections + - Investment optimization for sizing + + Used by: 08a-aggregation, 08b-rolling-horizon, 08c-clustering notebooks + """ + # Load real data + data_path = DATA_DIR / 'Zeitreihen2020.csv' + data = pd.read_csv(data_path, index_col=0, parse_dates=True).sort_index() + data = data['2020-01-01':'2020-01-31 23:45:00'] # One month + data.index.name = 'time' + + timesteps = data.index + electricity_demand = data['P_Netz/MW'].to_numpy() + heat_demand = data['Q_Netz/MW'].to_numpy() + electricity_price = data['Strompr.€/MWh'].to_numpy() + gas_price = data['Gaspr.€/MWh'].to_numpy() + + fs = fx.FlowSystem(timesteps) + fs.add_elements( + # Buses + fx.Bus('Electricity'), + fx.Bus('Heat'), + fx.Bus('Gas'), + fx.Bus('Coal'), + # Effects + fx.Effect('costs', '€', 'Total Costs', is_standard=True, is_objective=True), + fx.Effect('CO2', 'kg', 'CO2 Emissions'), + # CHP unit with investment + fx.linear_converters.CHP( + 'CHP', + thermal_efficiency=0.58, + electrical_efficiency=0.22, + electrical_flow=fx.Flow('P_el', bus='Electricity', size=200), + thermal_flow=fx.Flow( + 'Q_th', + bus='Heat', + size=fx.InvestParameters( + minimum_size=100, + maximum_size=300, + effects_of_investment_per_size={'costs': 10}, + ), + relative_minimum=0.3, + status_parameters=fx.StatusParameters(), + ), + fuel_flow=fx.Flow('Q_fu', bus='Coal'), + ), + # Gas Boiler with investment + fx.linear_converters.Boiler( + 'Boiler', + thermal_efficiency=0.85, + thermal_flow=fx.Flow( + 'Q_th', + bus='Heat', + size=fx.InvestParameters( + minimum_size=0, + maximum_size=150, + effects_of_investment_per_size={'costs': 5}, + ), + relative_minimum=0.1, + status_parameters=fx.StatusParameters(), + ), + fuel_flow=fx.Flow('Q_fu', bus='Gas'), + ), + # Thermal Storage with investment + fx.Storage( + 'Storage', + capacity_in_flow_hours=fx.InvestParameters( + minimum_size=0, + maximum_size=1000, + effects_of_investment_per_size={'costs': 0.5}, + ), + initial_charge_state=0, + eta_charge=1, + eta_discharge=1, + relative_loss_per_hour=0.001, + charging=fx.Flow('Charge', size=137, bus='Heat'), + discharging=fx.Flow('Discharge', size=158, bus='Heat'), + ), + # Fuel sources + fx.Source( + 'GasGrid', + outputs=[fx.Flow('Q_Gas', bus='Gas', size=1000, effects_per_flow_hour={'costs': gas_price, 'CO2': 0.3})], + ), + fx.Source( + 'CoalSupply', + outputs=[fx.Flow('Q_Coal', bus='Coal', size=1000, effects_per_flow_hour={'costs': 4.6, 'CO2': 0.3})], + ), + # Electricity grid + fx.Source( + 'GridBuy', + outputs=[ + fx.Flow( + 'P_el', + bus='Electricity', + size=1000, + effects_per_flow_hour={'costs': electricity_price + 0.5, 'CO2': 0.3}, + ) + ], + ), + fx.Sink( + 'GridSell', + inputs=[fx.Flow('P_el', bus='Electricity', size=1000, effects_per_flow_hour=-(electricity_price - 0.5))], + ), + # Demands + fx.Sink('HeatDemand', inputs=[fx.Flow('Q_th', bus='Heat', size=1, fixed_relative_profile=heat_demand)]), + fx.Sink( + 'ElecDemand', inputs=[fx.Flow('P_el', bus='Electricity', size=1, fixed_relative_profile=electricity_demand)] + ), + ) + return fs + + +def create_operational_system() -> fx.FlowSystem: + """Create an operational district heating system (no investments). + + Based on Zeitreihen2020.csv data (two weeks): + - CHP with startup costs + - Boiler with startup costs + - Storage with fixed capacity + - No investment parameters (for rolling horizon optimization) + + Used by: 08b-rolling-horizon notebook + """ + # Load real data + data_path = DATA_DIR / 'Zeitreihen2020.csv' + data = pd.read_csv(data_path, index_col=0, parse_dates=True).sort_index() + data = data['2020-01-01':'2020-01-14 23:45:00'] # Two weeks + data.index.name = 'time' + + timesteps = data.index + electricity_demand = data['P_Netz/MW'].to_numpy() + heat_demand = data['Q_Netz/MW'].to_numpy() + electricity_price = data['Strompr.€/MWh'].to_numpy() + gas_price = data['Gaspr.€/MWh'].to_numpy() + + fs = fx.FlowSystem(timesteps) + fs.add_elements( + fx.Bus('Electricity'), + fx.Bus('Heat'), + fx.Bus('Gas'), + fx.Bus('Coal'), + fx.Effect('costs', '€', 'Total Costs', is_standard=True, is_objective=True), + fx.Effect('CO2', 'kg', 'CO2 Emissions'), + # CHP with startup costs + fx.linear_converters.CHP( + 'CHP', + thermal_efficiency=0.58, + electrical_efficiency=0.22, + status_parameters=fx.StatusParameters(effects_per_startup=24000), + electrical_flow=fx.Flow('P_el', bus='Electricity', size=200), + thermal_flow=fx.Flow('Q_th', bus='Heat', size=200), + fuel_flow=fx.Flow('Q_fu', bus='Coal', size=288, relative_minimum=87 / 288, previous_flow_rate=100), + ), + # Boiler with startup costs + fx.linear_converters.Boiler( + 'Boiler', + thermal_efficiency=0.85, + thermal_flow=fx.Flow('Q_th', bus='Heat'), + fuel_flow=fx.Flow( + 'Q_fu', + bus='Gas', + size=95, + relative_minimum=12 / 95, + previous_flow_rate=20, + status_parameters=fx.StatusParameters(effects_per_startup=1000), + ), + ), + # Storage with fixed capacity + fx.Storage( + 'Storage', + capacity_in_flow_hours=684, + initial_charge_state=137, + minimal_final_charge_state=137, + maximal_final_charge_state=158, + eta_charge=1, + eta_discharge=1, + relative_loss_per_hour=0.001, + prevent_simultaneous_charge_and_discharge=True, + charging=fx.Flow('Charge', size=137, bus='Heat'), + discharging=fx.Flow('Discharge', size=158, bus='Heat'), + ), + fx.Source( + 'GasGrid', + outputs=[fx.Flow('Q_Gas', bus='Gas', size=1000, effects_per_flow_hour={'costs': gas_price, 'CO2': 0.3})], + ), + fx.Source( + 'CoalSupply', + outputs=[fx.Flow('Q_Coal', bus='Coal', size=1000, effects_per_flow_hour={'costs': 4.6, 'CO2': 0.3})], + ), + fx.Source( + 'GridBuy', + outputs=[ + fx.Flow( + 'P_el', + bus='Electricity', + size=1000, + effects_per_flow_hour={'costs': electricity_price + 0.5, 'CO2': 0.3}, + ) + ], + ), + fx.Sink( + 'GridSell', + inputs=[fx.Flow('P_el', bus='Electricity', size=1000, effects_per_flow_hour=-(electricity_price - 0.5))], + ), + fx.Sink('HeatDemand', inputs=[fx.Flow('Q_th', bus='Heat', size=1, fixed_relative_profile=heat_demand)]), + fx.Sink( + 'ElecDemand', inputs=[fx.Flow('P_el', bus='Electricity', size=1, fixed_relative_profile=electricity_demand)] + ), + ) + return fs + + +def create_seasonal_storage_system() -> fx.FlowSystem: + """Create a district heating system with solar thermal and seasonal storage. + + Demonstrates seasonal storage value with: + - Full year at hourly resolution (8760 timesteps) + - Solar thermal: high in summer, low in winter + - Heat demand: high in winter, low in summer + - Large seasonal pit storage (bridges seasons) + - Gas boiler backup + + This system clearly shows the value of inter-cluster storage linking: + - Summer: excess solar heat stored in pit + - Winter: stored heat reduces gas consumption + + Used by: 08c-clustering, 08c2-clustering-storage-modes notebooks + """ + # Full year, hourly + timesteps = pd.date_range('2024-01-01', periods=8760, freq='h') + hours = np.arange(8760) + hour_of_day = hours % 24 + day_of_year = hours // 24 + + np.random.seed(42) + + # --- Solar irradiance profile --- + # Seasonal variation: peaks in summer (day ~180), low in winter + seasonal_solar = 0.5 + 0.5 * np.cos(2 * np.pi * (day_of_year - 172) / 365) # Peak around June 21 + + # Daily variation: peaks at noon + daily_solar = np.maximum(0, np.cos(2 * np.pi * (hour_of_day - 12) / 24)) + + # Combine and scale (MW of solar thermal potential per MW installed) + solar_profile = seasonal_solar * daily_solar + solar_profile = solar_profile * (0.8 + 0.2 * np.random.random(8760)) # Add some variation + solar_profile = np.clip(solar_profile, 0, 1) + + # --- Heat demand profile --- + # Seasonal: high in winter, low in summer + seasonal_demand = 0.6 + 0.4 * np.cos(2 * np.pi * day_of_year / 365) # Peak Jan 1 + + # Daily: higher during day, lower at night + daily_demand = 0.7 + 0.3 * np.sin(2 * np.pi * (hour_of_day - 6) / 24) + + # Combine and scale to ~5 MW peak + heat_demand = 5 * seasonal_demand * daily_demand + heat_demand = heat_demand * (0.9 + 0.2 * np.random.random(8760)) # Add variation + heat_demand = np.clip(heat_demand, 0.5, 6) # MW + + # --- Gas price (slight seasonal variation) --- + gas_price = 40 + 10 * np.cos(2 * np.pi * day_of_year / 365) # €/MWh, higher in winter + + fs = fx.FlowSystem(timesteps) + fs.add_carriers( + fx.Carrier('gas', '#3498db', 'MW'), + fx.Carrier('heat', '#e74c3c', 'MW'), + ) + fs.add_elements( + # Buses + fx.Bus('Gas', carrier='gas'), + fx.Bus('Heat', carrier='heat'), + # Effects + fx.Effect('costs', '€', 'Total Costs', is_standard=True, is_objective=True), + fx.Effect('CO2', 'kg', 'CO2 Emissions'), + # Solar thermal collector (investment) - profile includes 70% collector efficiency + # Costs annualized for single-year analysis + fx.Source( + 'SolarThermal', + outputs=[ + fx.Flow( + 'Q_th', + bus='Heat', + size=fx.InvestParameters( + minimum_size=0, + maximum_size=20, # MW peak + effects_of_investment_per_size={'costs': 15000}, # €/MW (annualized) + ), + fixed_relative_profile=solar_profile * 0.7, # 70% collector efficiency + ) + ], + ), + # Gas boiler (backup) + fx.linear_converters.Boiler( + 'GasBoiler', + thermal_efficiency=0.90, + thermal_flow=fx.Flow( + 'Q_th', + bus='Heat', + size=fx.InvestParameters( + minimum_size=0, + maximum_size=8, # MW + effects_of_investment_per_size={'costs': 20000}, # €/MW (annualized) + ), + ), + fuel_flow=fx.Flow('Q_fu', bus='Gas'), + ), + # Gas supply (higher price makes solar+storage more attractive) + fx.Source( + 'GasGrid', + outputs=[ + fx.Flow( + 'Q_gas', + bus='Gas', + size=20, + effects_per_flow_hour={'costs': gas_price * 1.5, 'CO2': 0.2}, # €/MWh + ) + ], + ), + # Seasonal pit storage (large capacity for seasonal shifting) + fx.Storage( + 'SeasonalStorage', + capacity_in_flow_hours=fx.InvestParameters( + minimum_size=0, + maximum_size=5000, # MWh - large for seasonal storage + effects_of_investment_per_size={'costs': 20}, # €/MWh (pit storage is cheap) + ), + initial_charge_state='equals_final', # Yearly cyclic + eta_charge=0.95, + eta_discharge=0.95, + relative_loss_per_hour=0.0001, # Very low losses for pit storage + charging=fx.Flow( + 'Charge', + bus='Heat', + size=fx.InvestParameters(maximum_size=10, effects_of_investment_per_size={'costs': 5000}), + ), + discharging=fx.Flow( + 'Discharge', + bus='Heat', + size=fx.InvestParameters(maximum_size=10, effects_of_investment_per_size={'costs': 5000}), + ), + ), + # Heat demand + fx.Sink( + 'HeatDemand', + inputs=[fx.Flow('Q_th', bus='Heat', size=1, fixed_relative_profile=heat_demand)], + ), + ) + return fs + + def create_multiperiod_system() -> fx.FlowSystem: """Create a system with multiple periods and scenarios. @@ -236,10 +597,13 @@ def create_multiperiod_system() -> fx.FlowSystem: - 3 planning periods (years 2024, 2025, 2026) - 2 scenarios (high demand, low demand) - Each period: 48 hours (2 days representative) + Each period: 336 hours (2 weeks) - suitable for clustering demonstrations. + Use transform.sisel() to select subsets if needed. """ - timesteps = pd.date_range('2024-01-01', periods=48, freq='h') - hour_of_day = np.arange(48) % 24 + n_hours = 336 # 2 weeks + timesteps = pd.date_range('2024-01-01', periods=n_hours, freq='h') + hour_of_day = np.arange(n_hours) % 24 + day_of_week = (np.arange(n_hours) // 24) % 7 # Period definitions (years) periods = pd.Index([2024, 2025, 2026], name='period') @@ -248,19 +612,21 @@ def create_multiperiod_system() -> fx.FlowSystem: scenarios = pd.Index(['high_demand', 'low_demand'], name='scenario') scenario_weights = np.array([0.3, 0.7]) - # Base demand pattern (hourly) + # Base demand pattern (hourly) with daily and weekly variation base_pattern = np.where((hour_of_day >= 7) & (hour_of_day <= 18), 80.0, 35.0) + weekend_factor = np.where(day_of_week >= 5, 0.6, 1.0) + base_pattern = base_pattern * weekend_factor # Scenario-specific scaling np.random.seed(42) - high_demand = base_pattern * 1.2 + np.random.normal(0, 5, 48) - low_demand = base_pattern * 0.85 + np.random.normal(0, 3, 48) + high_demand = base_pattern * 1.3 + np.random.normal(0, 8, n_hours) + low_demand = base_pattern * 0.8 + np.random.normal(0, 5, n_hours) # Create DataFrame with scenario columns heat_demand = pd.DataFrame( { - 'high_demand': np.clip(high_demand, 20, 120), - 'low_demand': np.clip(low_demand, 15, 90), + 'high_demand': np.clip(high_demand, 20, 150), + 'low_demand': np.clip(low_demand, 15, 100), }, index=timesteps, ) @@ -322,6 +688,9 @@ def main(): ('simple_system', create_simple_system), ('complex_system', create_complex_system), ('multiperiod_system', create_multiperiod_system), + ('district_heating_system', create_district_heating_system), + ('operational_system', create_operational_system), + ('seasonal_storage_system', create_seasonal_storage_system), ] for name, create_func in systems: diff --git a/flixopt/__init__.py b/flixopt/__init__.py index 1e3fee5bd..73784f2cd 100644 --- a/flixopt/__init__.py +++ b/flixopt/__init__.py @@ -13,9 +13,8 @@ __version__ = '0.0.0.dev0' # Import commonly used classes and functions -from . import linear_converters, plotting, results, solvers +from . import clustering, linear_converters, plotting, results, solvers from .carrier import Carrier, CarrierContainer -from .clustering import ClusteringParameters from .components import ( LinearConverter, Sink, @@ -30,8 +29,9 @@ from .elements import Bus, Flow from .flow_system import FlowSystem from .interface import InvestParameters, Piece, Piecewise, PiecewiseConversion, PiecewiseEffects, StatusParameters -from .optimization import ClusteredOptimization, Optimization, SegmentedOptimization +from .optimization import Optimization, SegmentedOptimization from .plot_result import PlotResult +from .structure import TimeSeriesWeights __all__ = [ 'TimeSeriesData', @@ -50,7 +50,6 @@ 'Transmission', 'FlowSystem', 'Optimization', - 'ClusteredOptimization', 'SegmentedOptimization', 'InvestParameters', 'StatusParameters', @@ -58,8 +57,9 @@ 'Piecewise', 'PiecewiseConversion', 'PiecewiseEffects', - 'ClusteringParameters', 'PlotResult', + 'TimeSeriesWeights', + 'clustering', 'plotting', 'results', 'linear_converters', diff --git a/flixopt/clustering.py b/flixopt/clustering.py deleted file mode 100644 index d392167a1..000000000 --- a/flixopt/clustering.py +++ /dev/null @@ -1,431 +0,0 @@ -""" -This module contains the Clustering functionality for the flixopt framework. -Through this, clustering TimeSeriesData is possible. -""" - -from __future__ import annotations - -import copy -import logging -import timeit -from typing import TYPE_CHECKING - -import numpy as np - -try: - import tsam.timeseriesaggregation as tsam - - TSAM_AVAILABLE = True -except ImportError: - TSAM_AVAILABLE = False - -from .color_processing import process_colors -from .components import Storage -from .config import CONFIG -from .plot_result import PlotResult -from .structure import ( - FlowSystemModel, - Submodel, -) - -if TYPE_CHECKING: - import linopy - import pandas as pd - - from .core import Scalar, TimeSeriesData - from .elements import Component - from .flow_system import FlowSystem - -logger = logging.getLogger('flixopt') - - -class Clustering: - """ - Clustering organizing class - """ - - def __init__( - self, - original_data: pd.DataFrame, - hours_per_time_step: Scalar, - hours_per_period: Scalar, - nr_of_periods: int = 8, - weights: dict[str, float] | None = None, - time_series_for_high_peaks: list[str] | None = None, - time_series_for_low_peaks: list[str] | None = None, - ): - """ - Args: - original_data: The original data to aggregate - hours_per_time_step: The duration of each timestep in hours. - hours_per_period: The duration of each period in hours. - nr_of_periods: The number of typical periods to use in the aggregation. - weights: The weights for aggregation. If None, all time series are equally weighted. - time_series_for_high_peaks: List of time series to use for explicitly selecting periods with high values. - time_series_for_low_peaks: List of time series to use for explicitly selecting periods with low values. - """ - if not TSAM_AVAILABLE: - raise ImportError( - "The 'tsam' package is required for clustering functionality. Install it with 'pip install tsam'." - ) - self.original_data = copy.deepcopy(original_data) - self.hours_per_time_step = hours_per_time_step - self.hours_per_period = hours_per_period - self.nr_of_periods = nr_of_periods - self.nr_of_time_steps = len(self.original_data.index) - self.weights = weights or {} - self.time_series_for_high_peaks = time_series_for_high_peaks or [] - self.time_series_for_low_peaks = time_series_for_low_peaks or [] - - self.aggregated_data: pd.DataFrame | None = None - self.clustering_duration_seconds = None - self.tsam: tsam.TimeSeriesAggregation | None = None - - def cluster(self) -> None: - """ - Durchführung der Zeitreihenaggregation - """ - start_time = timeit.default_timer() - # Erstellen des aggregation objects - self.tsam = tsam.TimeSeriesAggregation( - self.original_data, - noTypicalPeriods=self.nr_of_periods, - hoursPerPeriod=self.hours_per_period, - resolution=self.hours_per_time_step, - clusterMethod='k_means', - extremePeriodMethod='new_cluster_center' - if self.use_extreme_periods - else 'None', # Wenn Extremperioden eingebunden werden sollen, nutze die Methode 'new_cluster_center' aus tsam - weightDict={name: weight for name, weight in self.weights.items() if name in self.original_data.columns}, - addPeakMax=self.time_series_for_high_peaks, - addPeakMin=self.time_series_for_low_peaks, - ) - - self.tsam.createTypicalPeriods() # Ausführen der Aggregation/Clustering - self.aggregated_data = self.tsam.predictOriginalData() - - self.clustering_duration_seconds = timeit.default_timer() - start_time # Zeit messen: - if logger.isEnabledFor(logging.INFO): - logger.info(self.describe_clusters()) - - def describe_clusters(self) -> str: - description = {} - for cluster in self.get_cluster_indices().keys(): - description[cluster] = [ - str(indexVector[0]) + '...' + str(indexVector[-1]) - for indexVector in self.get_cluster_indices()[cluster] - ] - - if self.use_extreme_periods: - # Zeitreihe rauslöschen: - extreme_periods = self.tsam.extremePeriods.copy() - for key in extreme_periods: - del extreme_periods[key]['profile'] - else: - extreme_periods = {} - - return ( - f'{"":#^80}\n' - f'{" Clustering ":#^80}\n' - f'periods_order:\n' - f'{self.tsam.clusterOrder}\n' - f'clusterPeriodNoOccur:\n' - f'{self.tsam.clusterPeriodNoOccur}\n' - f'index_vectors_of_clusters:\n' - f'{description}\n' - f'{"":#^80}\n' - f'extreme_periods:\n' - f'{extreme_periods}\n' - f'{"":#^80}' - ) - - @property - def use_extreme_periods(self): - return self.time_series_for_high_peaks or self.time_series_for_low_peaks - - def plot(self, colormap: str | None = None, show: bool | None = None) -> PlotResult: - """Plot original vs aggregated data comparison. - - Visualizes the original time series (dashed lines) overlaid with - the aggregated/clustered time series (solid lines) for comparison. - - Args: - colormap: Colorscale name for the time series colors. - Defaults to CONFIG.Plotting.default_qualitative_colorscale. - show: Whether to display the figure. - Defaults to CONFIG.Plotting.default_show. - - Returns: - PlotResult containing the comparison figure and underlying data. - - Examples: - >>> clustering.cluster() - >>> clustering.plot() - >>> clustering.plot(colormap='Set2', show=False).to_html('clustering.html') - """ - import plotly.express as px - import xarray as xr - - df_org = self.original_data.copy().rename( - columns={col: f'Original - {col}' for col in self.original_data.columns} - ) - df_agg = self.aggregated_data.copy().rename( - columns={col: f'Aggregated - {col}' for col in self.aggregated_data.columns} - ) - colors = list( - process_colors(colormap or CONFIG.Plotting.default_qualitative_colorscale, list(df_org.columns)).values() - ) - - # Create line plot for original data (dashed) - index_name = df_org.index.name or 'index' - df_org_long = df_org.reset_index().melt(id_vars=index_name, var_name='variable', value_name='value') - fig = px.line(df_org_long, x=index_name, y='value', color='variable', color_discrete_sequence=colors) - for trace in fig.data: - trace.update(line=dict(dash='dash')) - - # Add aggregated data (solid lines) - df_agg_long = df_agg.reset_index().melt(id_vars=index_name, var_name='variable', value_name='value') - fig2 = px.line(df_agg_long, x=index_name, y='value', color='variable', color_discrete_sequence=colors) - for trace in fig2.data: - fig.add_trace(trace) - - fig.update_layout( - title='Original vs Aggregated Data (original = ---)', - xaxis_title='Time in h', - yaxis_title='Value', - ) - - # Build xarray Dataset with both original and aggregated data - data = xr.Dataset( - { - 'original': self.original_data.to_xarray().to_array(dim='variable'), - 'aggregated': self.aggregated_data.to_xarray().to_array(dim='variable'), - } - ) - result = PlotResult(data=data, figure=fig) - - if show is None: - show = CONFIG.Plotting.default_show - if show: - result.show() - - return result - - def get_cluster_indices(self) -> dict[str, list[np.ndarray]]: - """ - Generates a dictionary that maps each cluster to a list of index vectors representing the time steps - assigned to that cluster for each period. - - Returns: - dict: {cluster_0: [index_vector_3, index_vector_7, ...], - cluster_1: [index_vector_1], - ...} - """ - clusters = self.tsam.clusterPeriodNoOccur.keys() - index_vectors = {cluster: [] for cluster in clusters} - - period_length = len(self.tsam.stepIdx) - total_steps = len(self.tsam.timeSeries) - - for period, cluster_id in enumerate(self.tsam.clusterOrder): - start_idx = period * period_length - end_idx = np.min([start_idx + period_length, total_steps]) - index_vectors[cluster_id].append(np.arange(start_idx, end_idx)) - - return index_vectors - - def get_equation_indices(self, skip_first_index_of_period: bool = True) -> tuple[np.ndarray, np.ndarray]: - """ - Generates pairs of indices for the equations by comparing index vectors of the same cluster. - If `skip_first_index_of_period` is True, the first index of each period is skipped. - - Args: - skip_first_index_of_period (bool): Whether to include or skip the first index of each period. - - Returns: - tuple[np.ndarray, np.ndarray]: Two arrays of indices. - """ - idx_var1 = [] - idx_var2 = [] - - # Iterate through cluster index vectors - for index_vectors in self.get_cluster_indices().values(): - if len(index_vectors) <= 1: # Only proceed if cluster has more than one period - continue - - # Process the first vector, optionally skip first index - first_vector = index_vectors[0][1:] if skip_first_index_of_period else index_vectors[0] - - # Compare first vector to others in the cluster - for other_vector in index_vectors[1:]: - if skip_first_index_of_period: - other_vector = other_vector[1:] - - # Compare elements up to the minimum length of both vectors - min_len = min(len(first_vector), len(other_vector)) - idx_var1.extend(first_vector[:min_len]) - idx_var2.extend(other_vector[:min_len]) - - # Convert lists to numpy arrays - return np.array(idx_var1), np.array(idx_var2) - - -class ClusteringParameters: - def __init__( - self, - hours_per_period: float, - nr_of_periods: int, - fix_storage_flows: bool, - aggregate_data_and_fix_non_binary_vars: bool, - percentage_of_period_freedom: float = 0, - penalty_of_period_freedom: float = 0, - time_series_for_high_peaks: list[TimeSeriesData] | None = None, - time_series_for_low_peaks: list[TimeSeriesData] | None = None, - ): - """ - Initializes clustering parameters for time series data - - Args: - hours_per_period: Duration of each period in hours. - nr_of_periods: Number of typical periods to use in the aggregation. - fix_storage_flows: Whether to aggregate storage flows (load/unload); if other flows - are fixed, fixing storage flows is usually not required. - aggregate_data_and_fix_non_binary_vars: Whether to aggregate all time series data, which allows to fix all time series variables (like flow_rate), - or only fix binary variables. If False non time_series data is changed!! If True, the mathematical Problem - is simplified even further. - percentage_of_period_freedom: Specifies the maximum percentage (0–100) of binary values within each period - that can deviate as "free variables", chosen by the solver (default is 0). - This allows binary variables to be 'partly equated' between aggregated periods. - penalty_of_period_freedom: The penalty associated with each "free variable"; defaults to 0. Added to Penalty - time_series_for_high_peaks: List of TimeSeriesData to use for explicitly selecting periods with high values. - time_series_for_low_peaks: List of TimeSeriesData to use for explicitly selecting periods with low values. - """ - self.hours_per_period = hours_per_period - self.nr_of_periods = nr_of_periods - self.fix_storage_flows = fix_storage_flows - self.aggregate_data_and_fix_non_binary_vars = aggregate_data_and_fix_non_binary_vars - self.percentage_of_period_freedom = percentage_of_period_freedom - self.penalty_of_period_freedom = penalty_of_period_freedom - self.time_series_for_high_peaks: list[TimeSeriesData] = time_series_for_high_peaks or [] - self.time_series_for_low_peaks: list[TimeSeriesData] = time_series_for_low_peaks or [] - - @property - def use_extreme_periods(self): - return self.time_series_for_high_peaks or self.time_series_for_low_peaks - - @property - def labels_for_high_peaks(self) -> list[str]: - return [ts.name for ts in self.time_series_for_high_peaks] - - @property - def labels_for_low_peaks(self) -> list[str]: - return [ts.name for ts in self.time_series_for_low_peaks] - - @property - def use_low_peaks(self) -> bool: - return bool(self.time_series_for_low_peaks) - - -class ClusteringModel(Submodel): - """The ClusteringModel holds equations and variables related to the Clustering of a FlowSystem. - It creates Equations that equates indices of variables, and introduces penalties related to binary variables, that - escape the equation to their related binaries in other periods""" - - def __init__( - self, - model: FlowSystemModel, - clustering_parameters: ClusteringParameters, - flow_system: FlowSystem, - clustering_data: Clustering, - components_to_clusterize: list[Component] | None, - ): - """ - Modeling-Element for "index-equating"-equations - """ - super().__init__(model, label_of_element='Clustering', label_of_model='Clustering') - self.flow_system = flow_system - self.clustering_parameters = clustering_parameters - self.clustering_data = clustering_data - self.components_to_clusterize = components_to_clusterize - - def do_modeling(self): - if not self.components_to_clusterize: - components = self.flow_system.components.values() - else: - components = [component for component in self.components_to_clusterize] - - indices = self.clustering_data.get_equation_indices(skip_first_index_of_period=True) - - time_variables: set[str] = { - name for name in self._model.variables if 'time' in self._model.variables[name].dims - } - binary_variables: set[str] = set(self._model.variables.binaries) - binary_time_variables: set[str] = time_variables & binary_variables - - for component in components: - if isinstance(component, Storage) and not self.clustering_parameters.fix_storage_flows: - continue # Fix Nothing in The Storage - - all_variables_of_component = set(component.submodel.variables) - - if self.clustering_parameters.aggregate_data_and_fix_non_binary_vars: - relevant_variables = component.submodel.variables[all_variables_of_component & time_variables] - else: - relevant_variables = component.submodel.variables[all_variables_of_component & binary_time_variables] - for variable in relevant_variables: - self._equate_indices(component.submodel.variables[variable], indices) - - penalty = self.clustering_parameters.penalty_of_period_freedom - if (self.clustering_parameters.percentage_of_period_freedom > 0) and penalty != 0: - from .effects import PENALTY_EFFECT_LABEL - - for variable_name in self.variables_direct: - variable = self.variables_direct[variable_name] - # Sum correction variables over all dimensions to get periodic penalty contribution - self._model.effects.add_share_to_effects( - name='Aggregation', - expressions={PENALTY_EFFECT_LABEL: (variable * penalty).sum('time')}, - target='periodic', - ) - - def _equate_indices(self, variable: linopy.Variable, indices: tuple[np.ndarray, np.ndarray]) -> None: - assert len(indices[0]) == len(indices[1]), 'The length of the indices must match!!' - length = len(indices[0]) - - # Gleichung: - # eq1: x(p1,t) - x(p3,t) = 0 # wobei p1 und p3 im gleichen Cluster sind und t = 0..N_p - con = self.add_constraints( - variable.isel(time=indices[0]) - variable.isel(time=indices[1]) == 0, - short_name=f'equate_indices|{variable.name}', - ) - - # Korrektur: (bisher nur für Binärvariablen:) - if ( - variable.name in self._model.variables.binaries - and self.clustering_parameters.percentage_of_period_freedom > 0 - ): - sel = variable.isel(time=indices[0]) - coords = {d: sel.indexes[d] for d in sel.dims} - var_k1 = self.add_variables(binary=True, coords=coords, short_name=f'correction1|{variable.name}') - - var_k0 = self.add_variables(binary=True, coords=coords, short_name=f'correction0|{variable.name}') - - # equation extends ... - # --> On(p3) can be 0/1 independent of On(p1,t)! - # eq1: On(p1,t) - On(p3,t) + K1(p3,t) - K0(p3,t) = 0 - # --> correction On(p3) can be: - # On(p1,t) = 1 -> On(p3) can be 0 -> K0=1 (,K1=0) - # On(p1,t) = 0 -> On(p3) can be 1 -> K1=1 (,K0=1) - con.lhs += 1 * var_k1 - 1 * var_k0 - - # interlock var_k1 and var_K2: - # eq: var_k0(t)+var_k1(t) <= 1 - self.add_constraints(var_k0 + var_k1 <= 1, short_name=f'lock_k0_and_k1|{variable.name}') - - # Begrenzung der Korrektur-Anzahl: - # eq: sum(K) <= n_Corr_max - limit = int(np.floor(self.clustering_parameters.percentage_of_period_freedom / 100 * length)) - self.add_constraints( - var_k0.sum(dim='time') + var_k1.sum(dim='time') <= limit, - short_name=f'limit_corrections|{variable.name}', - ) diff --git a/flixopt/clustering/__init__.py b/flixopt/clustering/__init__.py new file mode 100644 index 000000000..a5446a524 --- /dev/null +++ b/flixopt/clustering/__init__.py @@ -0,0 +1,42 @@ +""" +Time Series Aggregation Module for flixopt. + +This module provides data structures for time series clustering/aggregation. + +Key classes: +- ClusterResult: Universal result container for clustering +- ClusterStructure: Hierarchical structure info for storage inter-cluster linking +- Clustering: Stored on FlowSystem after clustering + +Example usage: + + # Cluster a FlowSystem to reduce timesteps + fs_clustered = flow_system.transform.cluster( + n_clusters=8, + cluster_duration='1D', + time_series_for_high_peaks=['Demand|fixed_relative_profile'], + ) + + # Access clustering metadata + info = fs_clustered.clustering + print(f'Number of clusters: {info.result.cluster_structure.n_clusters}') + + # Expand solution back to full resolution + fs_expanded = fs_clustered.transform.expand_solution() +""" + +from .base import ( + Clustering, + ClusterResult, + ClusterStructure, + create_cluster_structure_from_mapping, +) + +__all__ = [ + # Core classes + 'ClusterResult', + 'Clustering', + 'ClusterStructure', + # Utilities + 'create_cluster_structure_from_mapping', +] diff --git a/flixopt/clustering/base.py b/flixopt/clustering/base.py new file mode 100644 index 000000000..81c2b0bfc --- /dev/null +++ b/flixopt/clustering/base.py @@ -0,0 +1,1046 @@ +""" +Base classes and data structures for time series aggregation (clustering). + +This module provides an abstraction layer for time series aggregation that +supports multiple backends (TSAM, manual/external, etc.). + +Terminology: +- "cluster" = a group of similar time chunks (e.g., similar days grouped together) +- "typical period" = a representative time chunk for a cluster (TSAM terminology) +- "cluster duration" = the length of each time chunk (e.g., 24h for daily clustering) + +Note: This is separate from the model's "period" dimension (years/months) and +"scenario" dimension. The aggregation operates on the 'time' dimension. + +All data structures use xarray for consistent handling of coordinates. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +import numpy as np +import xarray as xr + +if TYPE_CHECKING: + from ..color_processing import ColorType + from ..flow_system import FlowSystem + from ..plot_result import PlotResult + from ..statistics_accessor import SelectType + + +@dataclass +class ClusterStructure: + """Structure information for inter-cluster storage linking. + + This class captures the hierarchical structure of time series clustering, + which is needed for proper storage state-of-charge tracking across + typical periods when using cluster(). + + Note: "original_period" here refers to the original time chunks before + clustering (e.g., 365 original days), NOT the model's "period" dimension + (years/months). Each original time chunk gets assigned to a cluster. + + Attributes: + cluster_order: Maps each original time chunk index to its cluster ID. + dims: [original_period] for simple case, or + [original_period, period, scenario] for multi-period/scenario systems. + Values are cluster indices (0 to n_clusters-1). + cluster_occurrences: Count of how many original time chunks each cluster represents. + dims: [cluster] for simple case, or [cluster, period, scenario] for multi-dim. + n_clusters: Number of distinct clusters (typical periods). + timesteps_per_cluster: Number of timesteps in each cluster (e.g., 24 for daily). + + Example: + For 365 days clustered into 8 typical days: + - cluster_order: shape (365,), values 0-7 indicating which cluster each day belongs to + - cluster_occurrences: shape (8,), e.g., [45, 46, 46, 46, 46, 45, 45, 46] + - n_clusters: 8 + - timesteps_per_cluster: 24 (for hourly data) + + For multi-scenario (e.g., 2 scenarios): + - cluster_order: shape (365, 2) with dims [original_period, scenario] + - cluster_occurrences: shape (8, 2) with dims [cluster, scenario] + """ + + cluster_order: xr.DataArray + cluster_occurrences: xr.DataArray + n_clusters: int | xr.DataArray + timesteps_per_cluster: int + + def __post_init__(self): + """Validate and ensure proper DataArray formatting.""" + # Ensure cluster_order is a DataArray with proper dims + if not isinstance(self.cluster_order, xr.DataArray): + self.cluster_order = xr.DataArray(self.cluster_order, dims=['original_period'], name='cluster_order') + elif self.cluster_order.name is None: + self.cluster_order = self.cluster_order.rename('cluster_order') + + # Ensure cluster_occurrences is a DataArray with proper dims + if not isinstance(self.cluster_occurrences, xr.DataArray): + self.cluster_occurrences = xr.DataArray( + self.cluster_occurrences, dims=['cluster'], name='cluster_occurrences' + ) + elif self.cluster_occurrences.name is None: + self.cluster_occurrences = self.cluster_occurrences.rename('cluster_occurrences') + + def __repr__(self) -> str: + n_clusters = ( + int(self.n_clusters) if isinstance(self.n_clusters, (int, np.integer)) else int(self.n_clusters.values) + ) + occ = [int(self.cluster_occurrences.sel(cluster=c).values) for c in range(n_clusters)] + return ( + f'ClusterStructure(\n' + f' {self.n_original_periods} original periods → {n_clusters} clusters\n' + f' timesteps_per_cluster={self.timesteps_per_cluster}\n' + f' occurrences={occ}\n' + f')' + ) + + @property + def n_original_periods(self) -> int: + """Number of original periods (before clustering).""" + return len(self.cluster_order.coords['original_period']) + + @property + def has_multi_dims(self) -> bool: + """Check if cluster_order has period/scenario dimensions.""" + return 'period' in self.cluster_order.dims or 'scenario' in self.cluster_order.dims + + def get_cluster_order_for_slice(self, period: str | None = None, scenario: str | None = None) -> np.ndarray: + """Get cluster_order for a specific (period, scenario) combination. + + Args: + period: Period label (None if no period dimension). + scenario: Scenario label (None if no scenario dimension). + + Returns: + 1D numpy array of cluster indices for the specified slice. + """ + order = self.cluster_order + if 'period' in order.dims and period is not None: + order = order.sel(period=period) + if 'scenario' in order.dims and scenario is not None: + order = order.sel(scenario=scenario) + return order.values.astype(int) + + def get_cluster_occurrences_for_slice( + self, period: str | None = None, scenario: str | None = None + ) -> dict[int, int]: + """Get cluster occurrence counts for a specific (period, scenario) combination. + + Args: + period: Period label (None if no period dimension). + scenario: Scenario label (None if no scenario dimension). + + Returns: + Dict mapping cluster ID to occurrence count. + """ + occurrences = self.cluster_occurrences + if 'period' in occurrences.dims and period is not None: + occurrences = occurrences.sel(period=period) + if 'scenario' in occurrences.dims and scenario is not None: + occurrences = occurrences.sel(scenario=scenario) + return {int(c): int(occurrences.sel(cluster=c).values) for c in occurrences.coords['cluster'].values} + + def get_cluster_weight_per_timestep(self) -> xr.DataArray: + """Get weight for each representative timestep. + + Returns an array where each timestep's weight equals the number of + original periods its cluster represents. + + Returns: + DataArray with dims [time] or [time, period, scenario]. + """ + # Expand cluster_occurrences to timesteps + n_clusters = ( + int(self.n_clusters) if isinstance(self.n_clusters, (int, np.integer)) else int(self.n_clusters.values) + ) + + # Get occurrence for each cluster, then repeat for timesteps + weights_list = [] + for c in range(n_clusters): + occ = self.cluster_occurrences.sel(cluster=c) + weights_list.append(np.repeat(float(occ.values), self.timesteps_per_cluster)) + + weights = np.concatenate(weights_list) + return xr.DataArray( + weights, + dims=['time'], + coords={'time': np.arange(len(weights))}, + name='cluster_weight', + ) + + def plot(self, show: bool | None = None): + """Plot cluster assignment visualization. + + Shows which cluster each original period belongs to, and the + number of occurrences per cluster. + + Args: + show: Whether to display the figure. Defaults to CONFIG.Plotting.default_show. + + Returns: + PlotResult containing the figure and underlying data. + """ + import plotly.express as px + + from ..config import CONFIG + from ..plot_result import PlotResult + + n_clusters = ( + int(self.n_clusters) if isinstance(self.n_clusters, (int, np.integer)) else int(self.n_clusters.values) + ) + + # Create DataFrame for plotting + import pandas as pd + + cluster_order = self.get_cluster_order_for_slice() + df = pd.DataFrame( + { + 'Original Period': range(1, len(cluster_order) + 1), + 'Cluster': cluster_order, + } + ) + + # Bar chart showing cluster assignment + fig = px.bar( + df, + x='Original Period', + y=[1] * len(df), + color='Cluster', + color_continuous_scale='Viridis', + title=f'Cluster Assignment ({self.n_original_periods} periods → {n_clusters} clusters)', + ) + fig.update_layout(yaxis_visible=False, coloraxis_colorbar_title='Cluster') + + # Build data for PlotResult + data = xr.Dataset( + { + 'cluster_order': self.cluster_order, + 'cluster_occurrences': self.cluster_occurrences, + } + ) + plot_result = PlotResult(data=data, figure=fig) + + if show is None: + show = CONFIG.Plotting.default_show + if show: + plot_result.show() + + return plot_result + + +@dataclass +class ClusterResult: + """Universal result from any time series aggregation method. + + This dataclass captures all information needed to: + 1. Transform a FlowSystem to use aggregated (clustered) timesteps + 2. Expand a solution back to original resolution + 3. Properly weight results for statistics + + Attributes: + timestep_mapping: Maps each original timestep to its representative index. + dims: [original_time] for simple case, or + [original_time, period, scenario] for multi-period/scenario systems. + Values are indices into the representative timesteps (0 to n_representatives-1). + n_representatives: Number of representative timesteps after aggregation. + representative_weights: Weight for each representative timestep. + dims: [time] or [time, period, scenario] + Typically equals the number of original timesteps each representative covers. + Used as cluster_weight in the FlowSystem. + aggregated_data: Time series data aggregated to representative timesteps. + Optional - some backends may not aggregate data. + cluster_structure: Hierarchical clustering structure for storage linking. + Optional - only needed when using cluster() mode. + original_data: Reference to original data before aggregation. + Optional - useful for expand_solution(). + + Example: + For 8760 hourly timesteps clustered into 192 representative timesteps (8 clusters x 24h): + - timestep_mapping: shape (8760,), values 0-191 + - n_representatives: 192 + - representative_weights: shape (192,), summing to 8760 + """ + + timestep_mapping: xr.DataArray + n_representatives: int | xr.DataArray + representative_weights: xr.DataArray + aggregated_data: xr.Dataset | None = None + cluster_structure: ClusterStructure | None = None + original_data: xr.Dataset | None = None + + def __post_init__(self): + """Validate and ensure proper DataArray formatting.""" + # Ensure timestep_mapping is a DataArray + if not isinstance(self.timestep_mapping, xr.DataArray): + self.timestep_mapping = xr.DataArray(self.timestep_mapping, dims=['original_time'], name='timestep_mapping') + elif self.timestep_mapping.name is None: + self.timestep_mapping = self.timestep_mapping.rename('timestep_mapping') + + # Ensure representative_weights is a DataArray + if not isinstance(self.representative_weights, xr.DataArray): + self.representative_weights = xr.DataArray( + self.representative_weights, dims=['time'], name='representative_weights' + ) + elif self.representative_weights.name is None: + self.representative_weights = self.representative_weights.rename('representative_weights') + + def __repr__(self) -> str: + n_rep = ( + int(self.n_representatives) + if isinstance(self.n_representatives, (int, np.integer)) + else int(self.n_representatives.values) + ) + has_structure = self.cluster_structure is not None + has_data = self.original_data is not None and self.aggregated_data is not None + return ( + f'ClusterResult(\n' + f' {self.n_original_timesteps} original → {n_rep} representative timesteps\n' + f' weights sum={float(self.representative_weights.sum().values):.0f}\n' + f' cluster_structure={has_structure}, data={has_data}\n' + f')' + ) + + @property + def n_original_timesteps(self) -> int: + """Number of original timesteps (before aggregation).""" + return len(self.timestep_mapping.coords['original_time']) + + def get_expansion_mapping(self) -> xr.DataArray: + """Get mapping from original timesteps to representative indices. + + This is the same as timestep_mapping but ensures proper naming + for use in expand_solution(). + + Returns: + DataArray mapping original timesteps to representative indices. + """ + return self.timestep_mapping.rename('expansion_mapping') + + def get_timestep_mapping_for_slice(self, period: str | None = None, scenario: str | None = None) -> np.ndarray: + """Get timestep_mapping for a specific (period, scenario) combination. + + Args: + period: Period label (None if no period dimension). + scenario: Scenario label (None if no scenario dimension). + + Returns: + 1D numpy array of representative timestep indices for the specified slice. + """ + mapping = self.timestep_mapping + if 'period' in mapping.dims and period is not None: + mapping = mapping.sel(period=period) + if 'scenario' in mapping.dims and scenario is not None: + mapping = mapping.sel(scenario=scenario) + return mapping.values.astype(int) + + def expand_data(self, aggregated: xr.DataArray, original_time: xr.DataArray | None = None) -> xr.DataArray: + """Expand aggregated data back to original timesteps. + + Uses the stored timestep_mapping to map each original timestep to its + representative value from the aggregated data. Handles multi-dimensional + data with period/scenario dimensions. + + Args: + aggregated: DataArray with aggregated (reduced) time dimension. + original_time: Original time coordinates. If None, uses coords from + original_data if available. + + Returns: + DataArray expanded to original timesteps. + + Example: + >>> result = fs_clustered.clustering.result + >>> aggregated_values = result.aggregated_data['Demand|profile'] + >>> expanded = result.expand_data(aggregated_values) + >>> len(expanded.time) == len(original_timesteps) # True + """ + import pandas as pd + + if original_time is None: + if self.original_data is None: + raise ValueError('original_time required when original_data is not available') + original_time = self.original_data.coords['time'] + + timestep_mapping = self.timestep_mapping + has_periods = 'period' in timestep_mapping.dims + has_scenarios = 'scenario' in timestep_mapping.dims + + # Simple case: no period/scenario dimensions + if not has_periods and not has_scenarios: + mapping = timestep_mapping.values + expanded_values = aggregated.values[mapping] + return xr.DataArray( + expanded_values, + coords={'time': original_time}, + dims=['time'], + attrs=aggregated.attrs, + ) + + # Multi-dimensional: expand each (period, scenario) slice and recombine + periods = list(timestep_mapping.coords['period'].values) if has_periods else [None] + scenarios = list(timestep_mapping.coords['scenario'].values) if has_scenarios else [None] + + expanded_slices: dict[tuple, xr.DataArray] = {} + for p in periods: + for s in scenarios: + # Get mapping for this slice + mapping_slice = timestep_mapping + if p is not None: + mapping_slice = mapping_slice.sel(period=p) + if s is not None: + mapping_slice = mapping_slice.sel(scenario=s) + mapping = mapping_slice.values + + # Select the data slice + selector = {} + if p is not None and 'period' in aggregated.dims: + selector['period'] = p + if s is not None and 'scenario' in aggregated.dims: + selector['scenario'] = s + + slice_da = aggregated.sel(**selector, drop=True) if selector else aggregated + expanded = slice_da.isel(time=xr.DataArray(mapping, dims=['time'])) + expanded_slices[(p, s)] = expanded.assign_coords(time=original_time) + + # Recombine slices using xr.concat + if has_periods and has_scenarios: + period_arrays = [] + for p in periods: + scenario_arrays = [expanded_slices[(p, s)] for s in scenarios] + period_arrays.append(xr.concat(scenario_arrays, dim=pd.Index(scenarios, name='scenario'))) + result = xr.concat(period_arrays, dim=pd.Index(periods, name='period')) + elif has_periods: + result = xr.concat([expanded_slices[(p, None)] for p in periods], dim=pd.Index(periods, name='period')) + else: + result = xr.concat( + [expanded_slices[(None, s)] for s in scenarios], dim=pd.Index(scenarios, name='scenario') + ) + + return result.transpose('time', ...).assign_attrs(aggregated.attrs) + + def validate(self) -> None: + """Validate that all fields are consistent. + + Raises: + ValueError: If validation fails. + """ + n_rep = ( + int(self.n_representatives) + if isinstance(self.n_representatives, (int, np.integer)) + else int(self.n_representatives.max().values) + ) + + # Check mapping values are within range + max_idx = int(self.timestep_mapping.max().values) + if max_idx >= n_rep: + raise ValueError(f'timestep_mapping contains index {max_idx} but n_representatives is {n_rep}') + + # Check weights length matches n_representatives + if len(self.representative_weights) != n_rep: + raise ValueError( + f'representative_weights has {len(self.representative_weights)} elements ' + f'but n_representatives is {n_rep}' + ) + + # Check weights sum roughly equals original timesteps + weight_sum = float(self.representative_weights.sum().values) + n_original = self.n_original_timesteps + if abs(weight_sum - n_original) > 1e-6: + # Warning only - some aggregation methods may not preserve this exactly + import warnings + + warnings.warn( + f'representative_weights sum ({weight_sum}) does not match n_original_timesteps ({n_original})', + stacklevel=2, + ) + + +class ClusteringPlotAccessor: + """Plot accessor for Clustering objects. + + Provides visualization methods for comparing original vs aggregated data + and understanding the clustering structure. + + Example: + >>> fs_clustered = flow_system.transform.cluster(n_clusters=8, cluster_duration='1D') + >>> fs_clustered.clustering.plot.compare() # timeseries comparison + >>> fs_clustered.clustering.plot.compare(kind='duration_curve') # duration curve + >>> fs_clustered.clustering.plot.heatmap() # structure visualization + >>> fs_clustered.clustering.plot.clusters() # cluster profiles + """ + + def __init__(self, clustering: Clustering): + self._clustering = clustering + + def compare( + self, + kind: str = 'timeseries', + variables: str | list[str] | None = None, + *, + select: SelectType | None = None, + colors: ColorType | None = None, + facet_col: str | None = 'period', + facet_row: str | None = 'scenario', + show: bool | None = None, + **plotly_kwargs: Any, + ) -> PlotResult: + """Compare original vs aggregated data. + + Args: + kind: Type of comparison plot. + - 'timeseries': Time series comparison (default) + - 'duration_curve': Sorted duration curve comparison + variables: Variable(s) to plot. Can be a string, list of strings, + or None to plot all time-varying variables. + select: xarray-style selection dict, e.g. {'scenario': 'Base Case'}. + colors: Color specification (colorscale name, color list, or label-to-color dict). + facet_col: Dimension for subplot columns (default: 'period'). + facet_row: Dimension for subplot rows (default: 'scenario'). + show: Whether to display the figure. + Defaults to CONFIG.Plotting.default_show. + **plotly_kwargs: Additional arguments passed to plotly. + + Returns: + PlotResult containing the comparison figure and underlying data. + """ + import pandas as pd + import plotly.express as px + + from ..color_processing import process_colors + from ..config import CONFIG + from ..plot_result import PlotResult + from ..statistics_accessor import _apply_selection + + if kind not in ('timeseries', 'duration_curve'): + raise ValueError(f"Unknown kind '{kind}'. Use 'timeseries' or 'duration_curve'.") + + result = self._clustering.result + if result.original_data is None or result.aggregated_data is None: + raise ValueError('No original/aggregated data available for comparison') + + resolved_variables = self._resolve_variables(variables) + + # Build Dataset with 'representation' dimension for Original/Clustered + data_vars = {} + for var in resolved_variables: + original = result.original_data[var] + clustered = result.expand_data(result.aggregated_data[var]) + combined = xr.concat([original, clustered], dim=pd.Index(['Original', 'Clustered'], name='representation')) + data_vars[var] = combined + ds = xr.Dataset(data_vars) + + # Apply selection + ds = _apply_selection(ds, select) + + # For duration curve: flatten and sort values + if kind == 'duration_curve': + sorted_vars = {} + for var in ds.data_vars: + for rep in ds.coords['representation'].values: + values = np.sort(ds[var].sel(representation=rep).values.flatten())[::-1] + sorted_vars[(var, rep)] = values + n = len(values) + ds = xr.Dataset( + { + var: xr.DataArray( + [sorted_vars[(var, r)] for r in ['Original', 'Clustered']], + dims=['representation', 'rank'], + coords={'representation': ['Original', 'Clustered'], 'rank': range(n)}, + ) + for var in resolved_variables + } + ) + + # Resolve facets (only for timeseries) + actual_facet_col = facet_col if kind == 'timeseries' and facet_col in ds.dims else None + actual_facet_row = facet_row if kind == 'timeseries' and facet_row in ds.dims else None + + # Convert to long-form DataFrame + df = ds.to_dataframe().reset_index() + coord_cols = [c for c in ds.coords.keys() if c in df.columns] + df = df.melt(id_vars=coord_cols, var_name='variable', value_name='value') + + variable_labels = df['variable'].unique().tolist() + color_map = process_colors(colors, variable_labels, CONFIG.Plotting.default_qualitative_colorscale) + + # Set x-axis and title based on kind + x_col = 'time' if kind == 'timeseries' else 'rank' + if kind == 'timeseries': + title = ( + 'Original vs Clustered' + if len(resolved_variables) > 1 + else f'Original vs Clustered: {resolved_variables[0]}' + ) + labels = {} + else: + title = 'Duration Curve' if len(resolved_variables) > 1 else f'Duration Curve: {resolved_variables[0]}' + labels = {'rank': 'Hours (sorted)', 'value': 'Value'} + + fig = px.line( + df, + x=x_col, + y='value', + color='variable', + line_dash='representation', + facet_col=actual_facet_col, + facet_row=actual_facet_row, + title=title, + labels=labels, + color_discrete_map=color_map, + **plotly_kwargs, + ) + if actual_facet_row or actual_facet_col: + fig.update_yaxes(matches=None) + fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1])) + + plot_result = PlotResult(data=ds, figure=fig) + + if show is None: + show = CONFIG.Plotting.default_show + if show: + plot_result.show() + + return plot_result + + def _get_time_varying_variables(self) -> list[str]: + """Get list of time-varying variables from original data.""" + result = self._clustering.result + if result.original_data is None: + return [] + return [ + name + for name in result.original_data.data_vars + if 'time' in result.original_data[name].dims + and not np.isclose(result.original_data[name].min(), result.original_data[name].max()) + ] + + def _resolve_variables(self, variables: str | list[str] | None) -> list[str]: + """Resolve variables parameter to a list of valid variable names.""" + time_vars = self._get_time_varying_variables() + if not time_vars: + raise ValueError('No time-varying variables found') + + if variables is None: + return time_vars + elif isinstance(variables, str): + if variables not in time_vars: + raise ValueError(f"Variable '{variables}' not found. Available: {time_vars}") + return [variables] + else: + invalid = [v for v in variables if v not in time_vars] + if invalid: + raise ValueError(f'Variables {invalid} not found. Available: {time_vars}') + return list(variables) + + def heatmap( + self, + *, + select: SelectType | None = None, + colors: str | list[str] | None = None, + facet_col: str | None = 'period', + animation_frame: str | None = 'scenario', + show: bool | None = None, + **plotly_kwargs: Any, + ) -> PlotResult: + """Plot cluster assignments over time as a heatmap timeline. + + Shows which cluster each timestep belongs to as a horizontal color bar. + The x-axis is time, color indicates cluster assignment. This visualization + aligns with time series data, making it easy to correlate cluster + assignments with other plots. + + For multi-period/scenario data, uses faceting and/or animation. + + Args: + select: xarray-style selection dict, e.g. {'scenario': 'Base Case'}. + colors: Colorscale name (str) or list of colors for heatmap coloring. + Dicts are not supported for heatmaps. + Defaults to CONFIG.Plotting.default_sequential_colorscale. + facet_col: Dimension to facet on columns (default: 'period'). + animation_frame: Dimension for animation slider (default: 'scenario'). + show: Whether to display the figure. + Defaults to CONFIG.Plotting.default_show. + **plotly_kwargs: Additional arguments passed to plotly. + + Returns: + PlotResult containing the heatmap figure and cluster assignment data. + The data has 'cluster' variable with time dimension, matching original timesteps. + """ + import pandas as pd + import plotly.express as px + + from ..config import CONFIG + from ..plot_result import PlotResult + from ..statistics_accessor import _apply_selection + + result = self._clustering.result + cs = result.cluster_structure + if cs is None: + raise ValueError('No cluster structure available') + + cluster_order_da = cs.cluster_order + timesteps_per_period = cs.timesteps_per_cluster + original_time = result.original_data.coords['time'] if result.original_data is not None else None + + # Apply selection if provided + if select: + cluster_order_da = _apply_selection(cluster_order_da.to_dataset(name='cluster'), select)['cluster'] + + # Check for multi-dimensional data + has_periods = 'period' in cluster_order_da.dims + has_scenarios = 'scenario' in cluster_order_da.dims + + # Get dimension values + periods = list(cluster_order_da.coords['period'].values) if has_periods else [None] + scenarios = list(cluster_order_da.coords['scenario'].values) if has_scenarios else [None] + + # Build cluster assignment per timestep for each (period, scenario) slice + cluster_slices: dict[tuple, xr.DataArray] = {} + for p in periods: + for s in scenarios: + cluster_order = cs.get_cluster_order_for_slice(period=p, scenario=s) + # Expand: each cluster repeated timesteps_per_period times + cluster_per_timestep = np.repeat(cluster_order, timesteps_per_period) + cluster_slices[(p, s)] = xr.DataArray( + cluster_per_timestep, + dims=['time'], + coords={'time': original_time} if original_time is not None else None, + ) + + # Combine slices into multi-dimensional DataArray + if has_periods and has_scenarios: + period_arrays = [] + for p in periods: + scenario_arrays = [cluster_slices[(p, s)] for s in scenarios] + period_arrays.append(xr.concat(scenario_arrays, dim=pd.Index(scenarios, name='scenario'))) + cluster_da = xr.concat(period_arrays, dim=pd.Index(periods, name='period')) + elif has_periods: + cluster_da = xr.concat( + [cluster_slices[(p, None)] for p in periods], + dim=pd.Index(periods, name='period'), + ) + elif has_scenarios: + cluster_da = xr.concat( + [cluster_slices[(None, s)] for s in scenarios], + dim=pd.Index(scenarios, name='scenario'), + ) + else: + cluster_da = cluster_slices[(None, None)] + + # Resolve facet_col and animation_frame - only use if dimension exists + actual_facet_col = facet_col if facet_col and facet_col in cluster_da.dims else None + actual_animation = animation_frame if animation_frame and animation_frame in cluster_da.dims else None + + # Add dummy y dimension for heatmap visualization (single row) + heatmap_da = cluster_da.expand_dims('y', axis=-1) + heatmap_da = heatmap_da.assign_coords(y=['Cluster']) + + colorscale = colors or CONFIG.Plotting.default_sequential_colorscale + + # Use px.imshow with xr.DataArray + fig = px.imshow( + heatmap_da, + color_continuous_scale=colorscale, + facet_col=actual_facet_col, + animation_frame=actual_animation, + title='Cluster Assignments', + labels={'time': 'Time', 'color': 'Cluster'}, + aspect='auto', + **plotly_kwargs, + ) + + # Clean up facet labels + if actual_facet_col: + fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1])) + + # Hide y-axis since it's just a single row + fig.update_yaxes(showticklabels=False) + + # Data is exactly what we plotted (without dummy y dimension) + cluster_da.name = 'cluster' + data = xr.Dataset({'cluster': cluster_da}) + plot_result = PlotResult(data=data, figure=fig) + + if show is None: + show = CONFIG.Plotting.default_show + if show: + plot_result.show() + + return plot_result + + def clusters( + self, + variables: str | list[str] | None = None, + *, + select: SelectType | None = None, + colors: ColorType | None = None, + facet_col_wrap: int | None = None, + show: bool | None = None, + **plotly_kwargs: Any, + ) -> PlotResult: + """Plot each cluster's typical period profile. + + Shows each cluster as a separate faceted subplot. Useful for + understanding what each cluster represents. + + Args: + variables: Variable(s) to plot. Can be a string, list of strings, + or None to plot all time-varying variables. + select: xarray-style selection dict, e.g. {'scenario': 'Base Case'}. + colors: Color specification (colorscale name, color list, or label-to-color dict). + facet_col_wrap: Max columns before wrapping facets. + Defaults to CONFIG.Plotting.default_facet_cols. + show: Whether to display the figure. + Defaults to CONFIG.Plotting.default_show. + **plotly_kwargs: Additional arguments passed to plotly. + + Returns: + PlotResult containing the figure and underlying data. + """ + import pandas as pd + import plotly.express as px + + from ..color_processing import process_colors + from ..config import CONFIG + from ..plot_result import PlotResult + from ..statistics_accessor import _apply_selection + + result = self._clustering.result + cs = result.cluster_structure + if result.aggregated_data is None or cs is None: + raise ValueError('No aggregated data or cluster structure available') + + # Apply selection to aggregated data + aggregated_data = _apply_selection(result.aggregated_data, select) + + time_vars = self._get_time_varying_variables() + if not time_vars: + raise ValueError('No time-varying variables found') + + # Resolve variables + resolved_variables = self._resolve_variables(variables) + + n_clusters = int(cs.n_clusters) if isinstance(cs.n_clusters, (int, np.integer)) else int(cs.n_clusters.values) + timesteps_per_cluster = cs.timesteps_per_cluster + + # Build long-form DataFrame with cluster labels including occurrence counts + rows = [] + data_vars = {} + for var in resolved_variables: + data = aggregated_data[var].values + data_by_cluster = data.reshape(n_clusters, timesteps_per_cluster) + data_vars[var] = xr.DataArray( + data_by_cluster, + dims=['cluster', 'timestep'], + coords={'cluster': range(n_clusters), 'timestep': range(timesteps_per_cluster)}, + ) + for c in range(n_clusters): + occurrence = int(cs.cluster_occurrences.sel(cluster=c).values) + label = f'Cluster {c} (×{occurrence})' + for t in range(timesteps_per_cluster): + rows.append({'cluster': label, 'timestep': t, 'value': data_by_cluster[c, t], 'variable': var}) + df = pd.DataFrame(rows) + + cluster_labels = df['cluster'].unique().tolist() + color_map = process_colors(colors, cluster_labels, CONFIG.Plotting.default_qualitative_colorscale) + facet_col_wrap = facet_col_wrap or CONFIG.Plotting.default_facet_cols + title = 'Clusters' if len(resolved_variables) > 1 else f'Clusters: {resolved_variables[0]}' + + fig = px.line( + df, + x='timestep', + y='value', + facet_col='cluster', + facet_row='variable' if len(resolved_variables) > 1 else None, + facet_col_wrap=facet_col_wrap if len(resolved_variables) == 1 else None, + title=title, + color_discrete_map=color_map, + **plotly_kwargs, + ) + fig.update_layout(showlegend=False) + if len(resolved_variables) > 1: + fig.update_yaxes(matches=None) + fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1])) + + data_vars['occurrences'] = cs.cluster_occurrences + result_data = xr.Dataset(data_vars) + plot_result = PlotResult(data=result_data, figure=fig) + + if show is None: + show = CONFIG.Plotting.default_show + if show: + plot_result.show() + + return plot_result + + +@dataclass +class Clustering: + """Information about an aggregation stored on a FlowSystem. + + This is stored on the FlowSystem after aggregation to enable: + - expand_solution() to map back to original timesteps + - Statistics to properly weight results + - Inter-cluster storage linking + - Serialization/deserialization of aggregated models + + Attributes: + result: The ClusterResult from the aggregation backend. + original_flow_system: Reference to the FlowSystem before aggregation. + backend_name: Name of the aggregation backend used (e.g., 'tsam', 'manual'). + + Example: + >>> fs_clustered = flow_system.transform.cluster(n_clusters=8, cluster_duration='1D') + >>> fs_clustered.clustering.n_clusters + 8 + >>> fs_clustered.clustering.plot.compare() + >>> fs_clustered.clustering.plot.heatmap() + """ + + result: ClusterResult + original_flow_system: FlowSystem # FlowSystem - avoid circular import + backend_name: str = 'unknown' + + def __repr__(self) -> str: + cs = self.result.cluster_structure + if cs is not None: + n_clusters = ( + int(cs.n_clusters) if isinstance(cs.n_clusters, (int, np.integer)) else int(cs.n_clusters.values) + ) + structure_info = f'{cs.n_original_periods} periods → {n_clusters} clusters' + else: + structure_info = 'no structure' + return f'Clustering(\n backend={self.backend_name!r}\n {structure_info}\n)' + + @property + def plot(self) -> ClusteringPlotAccessor: + """Access plotting methods for clustering visualization. + + Returns: + ClusteringPlotAccessor with compare(), heatmap(), and clusters() methods. + + Example: + >>> fs.clustering.plot.compare() # timeseries comparison + >>> fs.clustering.plot.compare(kind='duration_curve') # duration curve + >>> fs.clustering.plot.heatmap() # structure visualization + >>> fs.clustering.plot.clusters() # cluster profiles + """ + return ClusteringPlotAccessor(self) + + # Convenience properties delegating to nested objects + + @property + def cluster_order(self) -> xr.DataArray: + """Which cluster each original period belongs to.""" + if self.result.cluster_structure is None: + raise ValueError('No cluster_structure available') + return self.result.cluster_structure.cluster_order + + @property + def occurrences(self) -> xr.DataArray: + """How many original periods each cluster represents.""" + if self.result.cluster_structure is None: + raise ValueError('No cluster_structure available') + return self.result.cluster_structure.cluster_occurrences + + @property + def n_clusters(self) -> int: + """Number of clusters.""" + if self.result.cluster_structure is None: + raise ValueError('No cluster_structure available') + n = self.result.cluster_structure.n_clusters + return int(n) if isinstance(n, (int, np.integer)) else int(n.values) + + @property + def n_original_periods(self) -> int: + """Number of original periods (before clustering).""" + if self.result.cluster_structure is None: + raise ValueError('No cluster_structure available') + return self.result.cluster_structure.n_original_periods + + @property + def timesteps_per_period(self) -> int: + """Number of timesteps in each period/cluster.""" + if self.result.cluster_structure is None: + raise ValueError('No cluster_structure available') + return self.result.cluster_structure.timesteps_per_cluster + + @property + def timestep_mapping(self) -> xr.DataArray: + """Mapping from original timesteps to representative timestep indices.""" + return self.result.timestep_mapping + + @property + def cluster_start_positions(self) -> np.ndarray: + """Integer positions where clusters start. + + Returns the indices of the first timestep of each cluster. + Use these positions to build masks for specific use cases. + + Returns: + 1D numpy array of positions: [0, T, 2T, ...] where T = timesteps_per_period. + + Example: + For 2 clusters with 24 timesteps each: + >>> clustering.cluster_start_positions + array([0, 24]) + """ + if self.result.cluster_structure is None: + raise ValueError('No cluster_structure available') + + n_timesteps = self.n_clusters * self.timesteps_per_period + return np.arange(0, n_timesteps, self.timesteps_per_period) + + +def create_cluster_structure_from_mapping( + timestep_mapping: xr.DataArray, + timesteps_per_cluster: int, +) -> ClusterStructure: + """Create ClusterStructure from a timestep mapping. + + This is a convenience function for creating ClusterStructure when you + have the timestep mapping but not the full clustering metadata. + + Args: + timestep_mapping: Mapping from original timesteps to representative indices. + timesteps_per_cluster: Number of timesteps per cluster period. + + Returns: + ClusterStructure derived from the mapping. + """ + n_original = len(timestep_mapping) + n_original_periods = n_original // timesteps_per_cluster + + # Determine cluster order from the mapping + # Each original period maps to the cluster of its first timestep + cluster_order = [] + for p in range(n_original_periods): + start_idx = p * timesteps_per_cluster + cluster_idx = int(timestep_mapping.isel(original_time=start_idx).values) // timesteps_per_cluster + cluster_order.append(cluster_idx) + + cluster_order_da = xr.DataArray(cluster_order, dims=['original_period'], name='cluster_order') + + # Count occurrences of each cluster + unique_clusters = np.unique(cluster_order) + occurrences = {} + for c in unique_clusters: + occurrences[int(c)] = sum(1 for x in cluster_order if x == c) + + n_clusters = len(unique_clusters) + cluster_occurrences_da = xr.DataArray( + [occurrences.get(c, 0) for c in range(n_clusters)], + dims=['cluster'], + name='cluster_occurrences', + ) + + return ClusterStructure( + cluster_order=cluster_order_da, + cluster_occurrences=cluster_occurrences_da, + n_clusters=n_clusters, + timesteps_per_cluster=timesteps_per_cluster, + ) diff --git a/flixopt/clustering/intercluster_helpers.py b/flixopt/clustering/intercluster_helpers.py new file mode 100644 index 000000000..fd0e41cce --- /dev/null +++ b/flixopt/clustering/intercluster_helpers.py @@ -0,0 +1,106 @@ +"""Helper utilities for inter-cluster storage linking. + +This module provides reusable utilities for building inter-cluster storage linking +constraints following the S-N model from Blanke et al. (2022). +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +import numpy as np +import xarray as xr + +if TYPE_CHECKING: + from ..flow_system import FlowSystem + from ..interface import InvestParameters + + +@dataclass +class CapacityBounds: + """Extracted capacity bounds for storage SOC_boundary variables.""" + + lower: xr.DataArray + upper: xr.DataArray + has_investment: bool + + +def extract_capacity_bounds( + capacity_param: InvestParameters | int | float, + boundary_coords: dict, + boundary_dims: list[str], +) -> CapacityBounds: + """Extract capacity bounds from storage parameters. + + Handles: + - Fixed numeric values + - InvestParameters with fixed_size or maximum_size + - xr.DataArray with dimensions + + Args: + capacity_param: The capacity parameter (InvestParameters or scalar). + boundary_coords: Coordinates for SOC_boundary variable. + boundary_dims: Dimension names for SOC_boundary variable. + + Returns: + CapacityBounds with lower/upper bounds and investment flag. + """ + n_boundaries = len(boundary_coords['cluster_boundary']) + lb_shape = [n_boundaries] + [len(boundary_coords[d]) for d in boundary_dims[1:]] + + lb = xr.DataArray(np.zeros(lb_shape), coords=boundary_coords, dims=boundary_dims) + + # Determine has_investment and cap_value + has_investment = hasattr(capacity_param, 'maximum_size') + + if hasattr(capacity_param, 'fixed_size') and capacity_param.fixed_size is not None: + cap_value = capacity_param.fixed_size + elif hasattr(capacity_param, 'maximum_size') and capacity_param.maximum_size is not None: + cap_value = capacity_param.maximum_size + elif isinstance(capacity_param, (int, float)): + cap_value = capacity_param + else: + cap_value = 1e9 # Large default for unbounded case + + # Build upper bound + if isinstance(cap_value, xr.DataArray) and cap_value.dims: + ub = cap_value.expand_dims({'cluster_boundary': n_boundaries}, axis=0) + ub = ub.assign_coords(cluster_boundary=np.arange(n_boundaries)) + ub = ub.transpose('cluster_boundary', ...) + else: + if hasattr(cap_value, 'item'): + cap_value = float(cap_value.item()) + else: + cap_value = float(cap_value) + ub = xr.DataArray(np.full(lb_shape, cap_value), coords=boundary_coords, dims=boundary_dims) + + return CapacityBounds(lower=lb, upper=ub, has_investment=has_investment) + + +def build_boundary_coords( + n_original_periods: int, + flow_system: FlowSystem, +) -> tuple[dict, list[str]]: + """Build coordinates and dimensions for SOC_boundary variables. + + Args: + n_original_periods: Number of original (non-aggregated) periods. + flow_system: The FlowSystem containing period/scenario dimensions. + + Returns: + Tuple of (coords dict, dims list) ready for variable creation. + """ + n_boundaries = n_original_periods + 1 + coords = {'cluster_boundary': np.arange(n_boundaries)} + dims = ['cluster_boundary'] + + if flow_system.periods is not None: + dims.append('period') + coords['period'] = np.array(list(flow_system.periods)) + + if flow_system.scenarios is not None: + dims.append('scenario') + coords['scenario'] = np.array(list(flow_system.scenarios)) + + return coords, dims diff --git a/flixopt/components.py b/flixopt/components.py index 267c144af..019b6cc72 100644 --- a/flixopt/components.py +++ b/flixopt/components.py @@ -270,7 +270,7 @@ class Storage(Component): maximum_size (or fixed_size) must be explicitly set for proper model scaling. relative_minimum_charge_state: Minimum charge state (0-1). Default: 0. relative_maximum_charge_state: Maximum charge state (0-1). Default: 1. - initial_charge_state: Charge at start. Numeric or 'equals_final'. Default: 0. + initial_charge_state: Charge at start. Numeric, 'equals_final', or None (free). Default: 0. minimal_final_charge_state: Minimum absolute charge required at end (optional). maximal_final_charge_state: Maximum absolute charge allowed at end (optional). relative_minimum_final_charge_state: Minimum relative charge at end. @@ -282,6 +282,21 @@ class Storage(Component): relative_loss_per_hour: Self-discharge per hour (0-0.1). Default: 0. prevent_simultaneous_charge_and_discharge: Prevent charging and discharging simultaneously. Adds binary variables. Default: True. + cluster_mode: How this storage is treated during clustering optimization. + Only relevant when using ``transform.cluster()``. Options: + + - ``'independent'``: Clusters are fully decoupled. No constraints between + clusters, each cluster has free start/end SOC. Fast but ignores + seasonal storage value. + - ``'cyclic'``: Each cluster is self-contained. The SOC at the start of + each cluster equals its end (cluster returns to initial state). + Good for "average day" modeling. + - ``'intercluster'``: Link storage state across the original timeline using + SOC boundary variables (Kotzur et al. approach). Properly values + seasonal storage patterns. Overall SOC can drift. + - ``'intercluster_cyclic'`` (default): Like 'intercluster' but also enforces + that overall SOC returns to initial state (yearly cyclic). + meta_data: Additional information stored in results. Python native types only. Examples: @@ -388,7 +403,7 @@ def __init__( capacity_in_flow_hours: Numeric_PS | InvestParameters | None = None, relative_minimum_charge_state: Numeric_TPS = 0, relative_maximum_charge_state: Numeric_TPS = 1, - initial_charge_state: Numeric_PS | Literal['equals_final'] = 0, + initial_charge_state: Numeric_PS | Literal['equals_final'] | None = 0, minimal_final_charge_state: Numeric_PS | None = None, maximal_final_charge_state: Numeric_PS | None = None, relative_minimum_final_charge_state: Numeric_PS | None = None, @@ -398,6 +413,7 @@ def __init__( relative_loss_per_hour: Numeric_TPS = 0, prevent_simultaneous_charge_and_discharge: bool = True, balanced: bool = False, + cluster_mode: Literal['independent', 'cyclic', 'intercluster', 'intercluster_cyclic'] = 'intercluster_cyclic', meta_data: dict | None = None, ): # TODO: fixed_relative_chargeState implementieren @@ -427,6 +443,7 @@ def __init__( self.relative_loss_per_hour: Numeric_TPS = relative_loss_per_hour self.prevent_simultaneous_charge_and_discharge = prevent_simultaneous_charge_and_discharge self.balanced = balanced + self.cluster_mode = cluster_mode def create_model(self, model: FlowSystemModel) -> StorageModel: self._plausibility_checks() @@ -452,7 +469,7 @@ def transform_data(self) -> None: self.relative_loss_per_hour = self._fit_coords( f'{self.prefix}|relative_loss_per_hour', self.relative_loss_per_hour ) - if not isinstance(self.initial_charge_state, str): + if self.initial_charge_state is not None and not isinstance(self.initial_charge_state, str): self.initial_charge_state = self._fit_coords( f'{self.prefix}|initial_charge_state', self.initial_charge_state, dims=['period', 'scenario'] ) @@ -531,8 +548,8 @@ def _plausibility_checks(self) -> None: min_initial_at_max_capacity = maximum_capacity * self.relative_minimum_charge_state.isel(time=0) max_initial_at_min_capacity = minimum_capacity * self.relative_maximum_charge_state.isel(time=0) - # Only perform numeric comparisons if not using 'equals_final' - if not initial_equals_final: + # Only perform numeric comparisons if using a numeric initial_charge_state + if not initial_equals_final and self.initial_charge_state is not None: if (self.initial_charge_state > max_initial_at_min_capacity).any(): raise PlausibilityError( f'{self.label_full}: {self.initial_charge_state=} ' @@ -901,18 +918,51 @@ def _do_modeling(self): charge_state = self.charge_state rel_loss = self.element.relative_loss_per_hour - hours_per_step = self._model.hours_per_step + timestep_duration = self._model.timestep_duration charge_rate = self.element.charging.submodel.flow_rate discharge_rate = self.element.discharging.submodel.flow_rate eff_charge = self.element.eta_charge eff_discharge = self.element.eta_discharge - self.add_constraints( + # Build balance expression + lhs = ( charge_state.isel(time=slice(1, None)) - == charge_state.isel(time=slice(None, -1)) * ((1 - rel_loss) ** hours_per_step) - + charge_rate * eff_charge * hours_per_step - - discharge_rate * hours_per_step / eff_discharge, - short_name='charge_state', + - charge_state.isel(time=slice(None, -1)) * ((1 - rel_loss) ** timestep_duration) + - charge_rate * eff_charge * timestep_duration + + discharge_rate * timestep_duration / eff_discharge + ) + + # Handle clustering modes for storage + clustering = self._model.flow_system.clustering + mask = None + + if clustering is not None: + # All modes skip inter-cluster boundaries: removes naive link between end of cluster N and start of N+1 + mask = np.ones(lhs.sizes['time'], dtype=bool) + mask[clustering.cluster_start_positions[1:] - 1] = False + mask = xr.DataArray(mask, coords={'time': lhs.coords['time']}) + + self.add_constraints(lhs == 0, short_name='charge_state', mask=mask) + + # For 'cyclic' mode: each cluster's start equals its end + if clustering is not None and self.element.cluster_mode == 'cyclic': + starts = clustering.cluster_start_positions + for i, start_pos in enumerate(starts): + # End of cluster i is at (start of cluster i+1) - 1, or last timestep for final cluster + if i < len(starts) - 1: + end_pos = starts[i + 1] # In timesteps_extra, this is the end of cluster i + else: + end_pos = len(self._model.flow_system.timesteps) # Last position in timesteps_extra + self.add_constraints( + charge_state.isel(time=start_pos) == charge_state.isel(time=end_pos), + short_name=f'cluster_cyclic_{i}', + ) + + # Determine intercluster mode early (needed for investment bounding) + clustering = self._model.flow_system.clustering + is_intercluster = clustering is not None and self.element.cluster_mode in ( + 'intercluster', + 'intercluster_cyclic', ) # Create InvestmentModel and bounding constraints for investment @@ -927,15 +977,34 @@ def _do_modeling(self): short_name='investment', ) - BoundingPatterns.scaled_bounds( - self, - variable=self.charge_state, - scaling_variable=self.investment.size, - relative_bounds=self._relative_charge_state_bounds, - ) + # For intercluster modes, charge_state represents delta from SOC_boundary (can be negative). + # The bound should be [-size, +size] (can discharge or charge by full capacity). + # For non-intercluster modes, charge_state is absolute SOC and needs [0, size] bounds. + if is_intercluster: + # Symmetric bounds: -size <= charge_state <= size + self.add_constraints( + self.charge_state >= -self.investment.size, + short_name='charge_state|lb', + ) + self.add_constraints( + self.charge_state <= self.investment.size, + short_name='charge_state|ub', + ) + else: + BoundingPatterns.scaled_bounds( + self, + variable=self.charge_state, + scaling_variable=self.investment.size, + relative_bounds=self._relative_charge_state_bounds, + ) + + # Initial charge state (only for non-intercluster modes) + if not is_intercluster: + self._initial_and_final_charge_state() - # Initial charge state - self._initial_and_final_charge_state() + # Add inter-cluster linking for intercluster modes + if is_intercluster: + self._add_intercluster_linking() # Balanced sizes if self.element.balanced: @@ -969,22 +1038,220 @@ def _initial_and_final_charge_state(self): short_name='final_charge_min', ) + def _add_intercluster_linking(self) -> None: + """Add inter-cluster storage linking for aggregated optimization. + + Following the S-N model from Blanke et al. (2022), this method: + 1. Constrains charge_state at each cluster start to 0 (ΔE_0 = 0) + 2. Creates SOC_boundary variables to track absolute SOC across original periods + 3. Links via: SOC_boundary[d+1] = SOC_boundary[d] + delta_SOC[cluster_order[d]] + 4. Adds bounds: 0 ≤ SOC_boundary[d] + charge_state[t] ≤ capacity + 5. Optionally enforces cyclic: SOC_boundary[0] = SOC_boundary[end] + """ + from .clustering.intercluster_helpers import ( + build_boundary_coords, + extract_capacity_bounds, + ) + + clustering = self._model.flow_system.clustering + if clustering is None or clustering.result.cluster_structure is None: + return + + cluster_structure = clustering.result.cluster_structure + n_clusters = ( + int(cluster_structure.n_clusters) + if isinstance(cluster_structure.n_clusters, (int, np.integer)) + else int(cluster_structure.n_clusters.values) + ) + timesteps_per_cluster = cluster_structure.timesteps_per_cluster + n_original_periods = cluster_structure.n_original_periods + cluster_order = cluster_structure.cluster_order + + # 1. Add cluster start constraints (ΔE_0 = 0) - vectorized + self._add_cluster_start_constraints(n_clusters, timesteps_per_cluster) + + # 2. Create SOC_boundary variable + flow_system = self._model.flow_system + boundary_coords, boundary_dims = build_boundary_coords(n_original_periods, flow_system) + capacity_bounds = extract_capacity_bounds(self.element.capacity_in_flow_hours, boundary_coords, boundary_dims) + + soc_boundary = self.add_variables( + lower=capacity_bounds.lower, + upper=capacity_bounds.upper, + coords=boundary_coords, + dims=boundary_dims, + short_name='SOC_boundary', + ) + + # 3. Add SOC_boundary <= investment.size for investment-based storage + if capacity_bounds.has_investment and self.investment is not None: + self.add_constraints( + soc_boundary <= self.investment.size, + short_name='SOC_boundary_ub', + ) + + # 4. Compute delta_SOC as DataArray with 'cluster' dimension + delta_soc = self._compute_delta_soc(n_clusters, timesteps_per_cluster) + + # 5. Add linking constraints - vectorized + self._add_linking_constraints(soc_boundary, delta_soc, cluster_order, n_original_periods) + + # 6. Add cyclic constraint if requested + if self.element.cluster_mode == 'intercluster_cyclic': + self.add_constraints( + soc_boundary.isel(cluster_boundary=0) == soc_boundary.isel(cluster_boundary=n_original_periods), + short_name='cyclic', + ) + + # 7. Add combined bound constraints - vectorized + self._add_combined_bound_constraints( + soc_boundary, + cluster_order, + capacity_bounds.has_investment, + n_original_periods, + timesteps_per_cluster, + ) + + def _add_cluster_start_constraints(self, n_clusters: int, timesteps_per_cluster: int) -> None: + """Constrain charge_state at each cluster start to 0 (ΔE_0 = 0).""" + cluster_starts = np.arange(0, n_clusters * timesteps_per_cluster, timesteps_per_cluster) + self.add_constraints( + self.charge_state.isel(time=cluster_starts) == 0, + short_name='cluster_start', + ) + + def _compute_delta_soc(self, n_clusters: int, timesteps_per_cluster: int) -> xr.DataArray: + """Compute delta_SOC for each representative cluster as a DataArray. + + Returns DataArray with 'cluster' dimension containing the net charge state + change (end - start) for each cluster. + """ + starts = np.arange(0, n_clusters * timesteps_per_cluster, timesteps_per_cluster) + # Last timestep of each cluster (not first of next cluster) + ends = starts + timesteps_per_cluster - 1 + # Compute delta for all clusters at once + delta_soc = self.charge_state.isel(time=ends) - self.charge_state.isel(time=starts) + # Replace 'time' dim with 'cluster' dim + return delta_soc.assign_coords(time=np.arange(n_clusters)).rename({'time': 'cluster'}) + + def _add_linking_constraints( + self, + soc_boundary: xr.DataArray, + delta_soc: xr.DataArray, + cluster_order: xr.DataArray, + n_original_periods: int, + ) -> None: + """Add linking constraints: SOC_boundary[d+1] = SOC_boundary[d] + delta_SOC[cluster_order[d]]. + + Uses vectorized xarray operations instead of loops. + """ + # SOC at boundary d+1 (after original period d completes) + soc_after = soc_boundary.isel(cluster_boundary=slice(1, None)) + # SOC at boundary d (before original period d starts) + soc_before = soc_boundary.isel(cluster_boundary=slice(None, -1)) + + # Rename cluster_boundary -> original_period for alignment + soc_after = soc_after.rename({'cluster_boundary': 'original_period'}) + soc_after = soc_after.assign_coords(original_period=np.arange(n_original_periods)) + soc_before = soc_before.rename({'cluster_boundary': 'original_period'}) + soc_before = soc_before.assign_coords(original_period=np.arange(n_original_periods)) + + # Get delta_soc for each original period using cluster_order as advanced index + # cluster_order has dim 'original_period', delta_soc has dim 'cluster' + delta_soc_ordered = delta_soc.isel(cluster=cluster_order) + + # Single vectorized constraint for all original periods + lhs = soc_after - soc_before - delta_soc_ordered + self.add_constraints(lhs == 0, short_name='link') + + def _add_combined_bound_constraints( + self, + soc_boundary: xr.DataArray, + cluster_order: xr.DataArray, + has_investment: bool, + n_original_periods: int, + timesteps_per_cluster: int, + ) -> None: + """Add combined bound constraints: 0 <= SOC_boundary[d] + charge_state[t] <= capacity. + + Vectorizes over original_period dimension, loops over sample points. + """ + charge_state = self.charge_state + + # Get soc_boundary for each original period (boundary d, before period d starts) + soc_d = soc_boundary.isel(cluster_boundary=slice(None, -1)) # excludes final boundary + soc_d = soc_d.rename({'cluster_boundary': 'original_period'}) + soc_d = soc_d.assign_coords(original_period=np.arange(n_original_periods)) + + # Sample offsets within each cluster (start, middle, end) + sample_offsets = [0, timesteps_per_cluster // 2, timesteps_per_cluster] + max_time_idx = len(charge_state.coords['time']) - 1 + + # Convert cluster_order to numpy array for indexing + cluster_order_vals = cluster_order.values.astype(int) + cluster_starts = cluster_order_vals * timesteps_per_cluster + + for sample_name, offset in zip(['start', 'mid', 'end'], sample_offsets, strict=False): + time_indices = np.clip(cluster_starts + offset, 0, max_time_idx) + + # Get charge_state at these time indices using numpy array indexer + cs_t = charge_state.isel(time=time_indices) + + # Rename 'time' dim to 'original_period' to align with soc_d + cs_t = cs_t.rename({'time': 'original_period'}) + cs_t = cs_t.assign_coords(original_period=np.arange(n_original_periods)) + + # Combined SOC = soc_boundary[d] + charge_state[t] + combined = soc_d + cs_t + + # Lower bound constraint: combined >= 0 + self.add_constraints(combined >= 0, short_name=f'soc_lb_{sample_name}') + + # Upper bound constraint: combined <= capacity (for investment case) + if has_investment and self.investment is not None: + self.add_constraints(combined <= self.investment.size, short_name=f'soc_ub_{sample_name}') + @property def _absolute_charge_state_bounds(self) -> tuple[xr.DataArray, xr.DataArray]: relative_lower_bound, relative_upper_bound = self._relative_charge_state_bounds + + # For inter-cluster modes, charge_state represents relative change from cluster start (ΔE) + # which can be negative (discharge) or positive (charge). The actual SOC is SOC_boundary + ΔE. + # We set lower bound to -capacity to allow the full range. + clustering = self._model.flow_system.clustering + is_intercluster = clustering is not None and self.element.cluster_mode in ( + 'intercluster', + 'intercluster_cyclic', + ) + if self.element.capacity_in_flow_hours is None: - # Unbounded storage: lower bound is 0, upper bound is infinite - return (0, np.inf) + # Unbounded storage: lower bound is 0 (or -inf for intercluster), upper bound is infinite + return (-np.inf if is_intercluster else 0, np.inf) elif isinstance(self.element.capacity_in_flow_hours, InvestParameters): - return ( - relative_lower_bound * self.element.capacity_in_flow_hours.minimum_or_fixed_size, - relative_upper_bound * self.element.capacity_in_flow_hours.maximum_or_fixed_size, - ) + cap_min = self.element.capacity_in_flow_hours.minimum_or_fixed_size + cap_max = self.element.capacity_in_flow_hours.maximum_or_fixed_size + if is_intercluster: + # For inter-cluster, charge_state is relative to cluster start (ΔE in S-N model) + # ΔE can be negative (discharge) or positive (charge), so allow full range. + # Create bounds with proper time dimension using the shape from relative bounds. + ones = xr.ones_like(relative_upper_bound) + return (-ones * cap_max, ones * cap_max) + else: + return ( + relative_lower_bound * cap_min, + relative_upper_bound * cap_max, + ) else: - return ( - relative_lower_bound * self.element.capacity_in_flow_hours, - relative_upper_bound * self.element.capacity_in_flow_hours, - ) + cap = self.element.capacity_in_flow_hours + if is_intercluster: + # Same as above: create bounds with time dimension + ones = xr.ones_like(relative_upper_bound) + return (-ones * cap, ones * cap) + else: + return ( + relative_lower_bound * cap, + relative_upper_bound * cap, + ) @property def _relative_charge_state_bounds(self) -> tuple[xr.DataArray, xr.DataArray]: diff --git a/flixopt/elements.py b/flixopt/elements.py index 2933eb95a..608b6ac70 100644 --- a/flixopt/elements.py +++ b/flixopt/elements.py @@ -680,7 +680,7 @@ def _do_modeling(self): ModelingPrimitives.expression_tracking_variable( model=self, name=f'{self.label_full}|total_flow_hours', - tracked_expression=(self.flow_rate * self._model.hours_per_step).sum('time'), + tracked_expression=(self.flow_rate * self._model.aggregation_weight).sum('time'), bounds=( self.element.flow_hours_min if self.element.flow_hours_min is not None else 0, self.element.flow_hours_max if self.element.flow_hours_max is not None else None, @@ -821,12 +821,12 @@ def results_structure(self): } def _create_shares(self): - # Effects per flow hour + # Effects per flow hour (use timestep_duration only, cluster_weight is applied when summing to total) if self.element.effects_per_flow_hour: self._model.effects.add_share_to_effects( name=self.label_full, expressions={ - effect: self.flow_rate * self._model.hours_per_step * factor + effect: self.flow_rate * self._model.timestep_duration * factor for effect, factor in self.element.effects_per_flow_hour.items() }, target='temporal', @@ -839,7 +839,7 @@ def _create_bounds_for_load_factor(self): # Maximum load factor constraint if self.element.load_factor_max is not None: - flow_hours_per_size_max = self._model.hours_per_step.sum('time') * self.element.load_factor_max + flow_hours_per_size_max = self._model.aggregation_weight.sum('time') * self.element.load_factor_max self.add_constraints( self.total_flow_hours <= size * flow_hours_per_size_max, short_name='load_factor_max', @@ -847,7 +847,7 @@ def _create_bounds_for_load_factor(self): # Minimum load factor constraint if self.element.load_factor_min is not None: - flow_hours_per_size_min = self._model.hours_per_step.sum('time') * self.element.load_factor_min + flow_hours_per_size_min = self._model.aggregation_weight.sum('time') * self.element.load_factor_min self.add_constraints( self.total_flow_hours >= size * flow_hours_per_size_min, short_name='load_factor_min', @@ -951,7 +951,9 @@ def _do_modeling(self): # Add virtual supply/demand to balance and penalty if needed if self.element.allows_imbalance: - imbalance_penalty = np.multiply(self._model.hours_per_step, self.element.imbalance_penalty_per_flow_hour) + imbalance_penalty = np.multiply( + self._model.aggregation_weight, self.element.imbalance_penalty_per_flow_hour + ) self.virtual_supply = self.add_variables( lower=0, coords=self._model.get_coords(), short_name='virtual_supply' diff --git a/flixopt/features.py b/flixopt/features.py index 4dfe48964..75cb3d92c 100644 --- a/flixopt/features.py +++ b/flixopt/features.py @@ -199,12 +199,12 @@ def _do_modeling(self): # 3. Total duration tracking using existing pattern ModelingPrimitives.expression_tracking_variable( self, - tracked_expression=(self.status * self._model.hours_per_step).sum('time'), + tracked_expression=(self.status * self._model.aggregation_weight).sum('time'), bounds=( self.parameters.active_hours_min if self.parameters.active_hours_min is not None else 0, self.parameters.active_hours_max if self.parameters.active_hours_max is not None - else self._model.hours_per_step.sum('time').max().item(), + else self._model.aggregation_weight.sum('time').max().item(), ), short_name='active_hours', coords=['period', 'scenario'], @@ -242,7 +242,7 @@ def _do_modeling(self): short_name='uptime', minimum_duration=self.parameters.min_uptime, maximum_duration=self.parameters.max_uptime, - duration_per_step=self.hours_per_step, + duration_per_step=self.timestep_duration, duration_dim='time', previous_duration=self._get_previous_uptime(), ) @@ -255,7 +255,7 @@ def _do_modeling(self): short_name='downtime', minimum_duration=self.parameters.min_downtime, maximum_duration=self.parameters.max_downtime, - duration_per_step=self.hours_per_step, + duration_per_step=self.timestep_duration, duration_dim='time', previous_duration=self._get_previous_downtime(), ) @@ -263,12 +263,12 @@ def _do_modeling(self): self._add_effects() def _add_effects(self): - """Add operational effects""" + """Add operational effects (use timestep_duration only, cluster_weight is applied when summing to total)""" if self.parameters.effects_per_active_hour: self._model.effects.add_share_to_effects( name=self.label_of_element, expressions={ - effect: self.status * factor * self._model.hours_per_step + effect: self.status * factor * self._model.timestep_duration for effect, factor in self.parameters.effects_per_active_hour.items() }, target='temporal', @@ -330,7 +330,7 @@ def _get_previous_uptime(self): Returns 0 if no previous status is provided (assumes previously inactive). """ - hours_per_step = self._model.hours_per_step.isel(time=0).min().item() + hours_per_step = self._model.timestep_duration.isel(time=0).min().item() if self._previous_status is None: return 0 else: @@ -341,7 +341,7 @@ def _get_previous_downtime(self): Returns one timestep duration if no previous status is provided (assumes previously inactive). """ - hours_per_step = self._model.hours_per_step.isel(time=0).min().item() + hours_per_step = self._model.timestep_duration.isel(time=0).min().item() if self._previous_status is None: return hours_per_step else: @@ -612,16 +612,16 @@ def _do_modeling(self): if 'time' in self._dims: self.total_per_timestep = self.add_variables( - lower=-np.inf if (self._min_per_hour is None) else self._min_per_hour * self._model.hours_per_step, - upper=np.inf if (self._max_per_hour is None) else self._max_per_hour * self._model.hours_per_step, + lower=-np.inf if (self._min_per_hour is None) else self._min_per_hour * self._model.timestep_duration, + upper=np.inf if (self._max_per_hour is None) else self._max_per_hour * self._model.timestep_duration, coords=self._model.get_coords(self._dims), short_name='per_timestep', ) self._eq_total_per_timestep = self.add_constraints(self.total_per_timestep == 0, short_name='per_timestep') - # Add it to the total - self._eq_total.lhs -= self.total_per_timestep.sum(dim='time') + # Add it to the total (cluster_weight handles cluster representation, defaults to 1.0) + self._eq_total.lhs -= (self.total_per_timestep * self._model.cluster_weight).sum(dim='time') def add_share( self, diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index 13821b35b..40ab95b48 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -38,7 +38,9 @@ import pyvis + from .clustering import Clustering from .solvers import _Solver + from .structure import TimeSeriesWeights from .types import Effect_TPS, Numeric_S, Numeric_TPS, NumericOrBool from .carrier import Carrier, CarrierContainer @@ -65,8 +67,12 @@ class FlowSystem(Interface, CompositeContainerMixin[Element]): weight_of_last_period: Weight/duration of the last period. If None, computed from the last period interval. Used for calculating sums over periods in multi-period models. scenario_weights: The weights of each scenario. If None, all scenarios have the same weight (normalized to 1). - Period weights are always computed internally from the period index (like hours_per_timestep for time). + Period weights are always computed internally from the period index (like timestep_duration for time). The final `weights` array (accessible via `flow_system.model.objective_weights`) is computed as period_weights × normalized_scenario_weights, with normalization applied to the scenario weights by default. + cluster_weight: Weight for each timestep representing cluster representation count. + If None (default), all timesteps have weight 1.0. Used by cluster() to specify + how many original timesteps each cluster represents. Combined with timestep_duration + via aggregation_weight for proper time aggregation in clustered models. scenario_independent_sizes: Controls whether investment sizes are equalized across scenarios. - True: All sizes are shared/equalized across scenarios - False: All sizes are optimized separately per scenario @@ -170,6 +176,7 @@ def __init__( hours_of_previous_timesteps: int | float | np.ndarray | None = None, weight_of_last_period: int | float | None = None, scenario_weights: Numeric_S | None = None, + cluster_weight: Numeric_TPS | None = None, scenario_independent_sizes: bool | list[str] = True, scenario_independent_flow_rates: bool | list[str] = False, name: str | None = None, @@ -181,13 +188,22 @@ def __init__( self.timesteps_extra, self.hours_of_last_timestep, self.hours_of_previous_timesteps, - hours_per_timestep, + timestep_duration, ) = self._compute_time_metadata(self.timesteps, hours_of_last_timestep, hours_of_previous_timesteps) self.periods = None if periods is None else self._validate_periods(periods) self.scenarios = None if scenarios is None else self._validate_scenarios(scenarios) - self.hours_per_timestep = self.fit_to_model_coords('hours_per_timestep', hours_per_timestep) + self.timestep_duration = self.fit_to_model_coords('timestep_duration', timestep_duration) + + # Cluster weight for cluster() optimization (default 1.0) + # Represents how many original timesteps each cluster represents + # May have period/scenario dimensions if cluster() was used with those + self.cluster_weight = self.fit_to_model_coords( + 'cluster_weight', + np.ones(len(self.timesteps)) if cluster_weight is None else cluster_weight, + dims=['time', 'period', 'scenario'], # Gracefully ignores dims not present + ) self.scenario_weights = scenario_weights # Use setter @@ -216,8 +232,8 @@ def __init__( # Solution dataset - populated after optimization or loaded from file self._solution: xr.Dataset | None = None - # Clustering info - populated by transform.cluster() - self._clustering_info: dict | None = None + # Aggregation info - populated by transform.cluster() + self.clustering: Clustering | None = None # Statistics accessor cache - lazily initialized, invalidated on new solution self._statistics: StatisticsAccessor | None = None @@ -302,11 +318,11 @@ def _create_timesteps_with_extra( return pd.DatetimeIndex(timesteps.append(last_date), name='time') @staticmethod - def calculate_hours_per_timestep(timesteps_extra: pd.DatetimeIndex) -> xr.DataArray: - """Calculate duration of each timestep as a 1D DataArray.""" + def calculate_timestep_duration(timesteps_extra: pd.DatetimeIndex) -> xr.DataArray: + """Calculate duration of each timestep in hours as a 1D DataArray.""" hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1) return xr.DataArray( - hours_per_step, coords={'time': timesteps_extra[:-1]}, dims='time', name='hours_per_timestep' + hours_per_step, coords={'time': timesteps_extra[:-1]}, dims='time', name='timestep_duration' ) @staticmethod @@ -377,22 +393,22 @@ def _compute_time_metadata( Can be a scalar or array. Returns: - Tuple of (timesteps_extra, hours_of_last_timestep, hours_of_previous_timesteps, hours_per_timestep) + Tuple of (timesteps_extra, hours_of_last_timestep, hours_of_previous_timesteps, timestep_duration) """ # Create timesteps with extra step at the end timesteps_extra = cls._create_timesteps_with_extra(timesteps, hours_of_last_timestep) - # Calculate hours per timestep - hours_per_timestep = cls.calculate_hours_per_timestep(timesteps_extra) + # Calculate timestep duration + timestep_duration = cls.calculate_timestep_duration(timesteps_extra) # Extract hours_of_last_timestep if not provided if hours_of_last_timestep is None: - hours_of_last_timestep = hours_per_timestep.isel(time=-1).item() + hours_of_last_timestep = timestep_duration.isel(time=-1).item() # Compute hours_of_previous_timesteps (handles both None and provided cases) hours_of_previous_timesteps = cls._calculate_hours_of_previous_timesteps(timesteps, hours_of_previous_timesteps) - return timesteps_extra, hours_of_last_timestep, hours_of_previous_timesteps, hours_per_timestep + return timesteps_extra, hours_of_last_timestep, hours_of_previous_timesteps, timestep_duration @classmethod def _compute_period_metadata( @@ -437,7 +453,7 @@ def _update_time_metadata( """ Update time-related attributes and data variables in dataset based on its time index. - Recomputes hours_of_last_timestep, hours_of_previous_timesteps, and hours_per_timestep + Recomputes hours_of_last_timestep, hours_of_previous_timesteps, and timestep_duration from the dataset's time index when these parameters are None. This ensures time metadata stays synchronized with the actual timesteps after operations like resampling or selection. @@ -453,14 +469,14 @@ def _update_time_metadata( new_time_index = dataset.indexes.get('time') if new_time_index is not None and len(new_time_index) >= 2: # Use shared helper to compute all time metadata - _, hours_of_last_timestep, hours_of_previous_timesteps, hours_per_timestep = cls._compute_time_metadata( + _, hours_of_last_timestep, hours_of_previous_timesteps, timestep_duration = cls._compute_time_metadata( new_time_index, hours_of_last_timestep, hours_of_previous_timesteps ) - # Update hours_per_timestep DataArray if it exists in the dataset + # Update timestep_duration DataArray if it exists in the dataset # This prevents stale data after resampling operations - if 'hours_per_timestep' in dataset.data_vars: - dataset['hours_per_timestep'] = hours_per_timestep + if 'timestep_duration' in dataset.data_vars: + dataset['timestep_duration'] = timestep_duration # Update time-related attributes only when new values are provided/computed # This preserves existing metadata instead of overwriting with None @@ -677,6 +693,9 @@ def from_dataset(cls, ds: xr.Dataset) -> FlowSystem: scenario_weights=cls._resolve_dataarray_reference(reference_structure['scenario_weights'], arrays_dict) if 'scenario_weights' in reference_structure else None, + cluster_weight=cls._resolve_dataarray_reference(reference_structure['cluster_weight'], arrays_dict) + if 'cluster_weight' in reference_structure + else None, scenario_independent_sizes=reference_structure.get('scenario_independent_sizes', True), scenario_independent_flow_rates=reference_structure.get('scenario_independent_flow_rates', False), name=reference_structure.get('name'), @@ -1253,6 +1272,7 @@ def build_model(self, normalize_weights: bool = True) -> FlowSystem: 1. Connecting and transforming all elements (if not already done) 2. Creating the FlowSystemModel with all variables and constraints 3. Adding clustering constraints (if this is a clustered FlowSystem) + 4. Adding typical periods modeling (if this is a reduced FlowSystem) After calling this method, `self.model` will be available for inspection before solving. @@ -1270,33 +1290,11 @@ def build_model(self, normalize_weights: bool = True) -> FlowSystem: """ self.connect_and_transform() self.create_model(normalize_weights) - self.model.do_modeling() - # Add clustering constraints if this is a clustered FlowSystem - if self._clustering_info is not None: - self._add_clustering_constraints() + self.model.do_modeling() return self - def _add_clustering_constraints(self) -> None: - """Add clustering constraints to the model.""" - from .clustering import ClusteringModel - - info = self._clustering_info or {} - required_keys = {'parameters', 'clustering', 'components_to_clusterize'} - missing_keys = required_keys - set(info) - if missing_keys: - raise KeyError(f'_clustering_info missing required keys: {sorted(missing_keys)}') - - clustering_model = ClusteringModel( - model=self.model, - clustering_parameters=info['parameters'], - flow_system=self, - clustering_data=info['clustering'], - components_to_clusterize=info['components_to_clusterize'], - ) - clustering_model.do_modeling() - def solve(self, solver: _Solver) -> FlowSystem: """ Solve the optimization model and populate the solution. @@ -1886,6 +1884,47 @@ def scenario_weights(self, value: Numeric_S | None) -> None: self._scenario_weights = self.fit_to_model_coords('scenario_weights', value, dims=['scenario']) + @property + def weights(self) -> TimeSeriesWeights: + """Unified weighting system for time series aggregation. + + Returns a TimeSeriesWeights object providing a clean, unified interface + for all weight types used in flixopt. This is the recommended way to + access weights for new code (PyPSA-inspired design). + + The temporal weight combines timestep_duration and cluster_weight, + which is the proper weight for summing over time. + + Returns: + TimeSeriesWeights with temporal, period, and scenario weights. + + Example: + >>> weights = flow_system.weights + >>> weighted_total = (flow_rate * weights.temporal).sum('time') + >>> # Or use the convenience method: + >>> weighted_total = weights.sum_over_time(flow_rate) + """ + from .structure import TimeSeriesWeights + + return TimeSeriesWeights( + temporal=self.timestep_duration * self.cluster_weight, + period=self.period_weights, + scenario=self._scenario_weights, + ) + + @property + def aggregation_weight(self) -> xr.DataArray: + """Combined weight for time aggregation. + + Combines timestep_duration (physical duration) and cluster_weight (cluster representation). + Use this for proper time aggregation in clustered models. + + Note: + This is equivalent to `weights.temporal`. The unified TimeSeriesWeights + interface (via `flow_system.weights`) is recommended for new code. + """ + return self.timestep_duration * self.cluster_weight + def _validate_scenario_parameter(self, value: bool | list[str], param_name: str, element_type: str) -> None: """ Validate scenario parameter value. diff --git a/flixopt/optimization.py b/flixopt/optimization.py index b76ceaf03..6a1a87ce1 100644 --- a/flixopt/optimization.py +++ b/flixopt/optimization.py @@ -1,11 +1,12 @@ """ This module contains the Optimization functionality for the flixopt framework. It is used to optimize a FlowSystemModel for a given FlowSystem through a solver. -There are three different Optimization types: + +There are two Optimization types: 1. Optimization: Optimizes the FlowSystemModel for the full FlowSystem - 2. ClusteredOptimization: Optimizes the FlowSystemModel for the full FlowSystem, but clusters the TimeSeriesData. - This simplifies the mathematical model and usually speeds up the solving process. - 3. SegmentedOptimization: Solves a FlowSystemModel for each individual Segment of the FlowSystem. + 2. SegmentedOptimization: Solves a FlowSystemModel for each individual Segment of the FlowSystem. + +For time series aggregation (clustering), use FlowSystem.transform.cluster() instead. """ from __future__ import annotations @@ -16,27 +17,22 @@ import sys import timeit import warnings -from collections import Counter from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable -import numpy as np from tqdm import tqdm from . import io as fx_io -from .clustering import Clustering, ClusteringModel, ClusteringParameters from .components import Storage from .config import CONFIG, DEPRECATION_REMOVAL_VERSION, SUCCESS_LEVEL -from .core import DataConverter, TimeSeriesData, drop_constant_arrays from .effects import PENALTY_EFFECT_LABEL from .features import InvestmentModel -from .flow_system import FlowSystem from .results import Results, SegmentedResults if TYPE_CHECKING: import pandas as pd import xarray as xr - from .elements import Component + from .flow_system import FlowSystem from .solvers import _Solver from .structure import FlowSystemModel @@ -357,162 +353,6 @@ def modeled(self) -> bool: return True if self.model is not None else False -class ClusteredOptimization(Optimization): - """ - ClusteredOptimization reduces computational complexity by clustering time series into typical periods. - - This optimization approach clusters time series data using techniques from the tsam library to identify - representative time periods, significantly reducing computation time while maintaining solution accuracy. - - Note: - The quality of the solution depends on the choice of aggregation parameters. - The optimal parameters depend on the specific problem and the characteristics of the time series data. - For more information, refer to the [tsam documentation](https://tsam.readthedocs.io/en/latest/). - - Args: - name: Name of the optimization - flow_system: FlowSystem to be optimized - clustering_parameters: Parameters for clustering. See ClusteringParameters class documentation - components_to_clusterize: list of Components to perform aggregation on. If None, all components are aggregated. - This equalizes variables in the components according to the typical periods computed in the aggregation - folder: Folder where results should be saved. If None, current working directory is used - normalize_weights: Whether to automatically normalize the weights of scenarios to sum up to 1 when solving - - Attributes: - clustering (Clustering | None): Contains the clustered time series data - clustering_model (ClusteringModel | None): Contains Variables and Constraints that equalize clusters of the time series data - """ - - def __init__( - self, - name: str, - flow_system: FlowSystem, - clustering_parameters: ClusteringParameters, - components_to_clusterize: list[Component] | None = None, - folder: pathlib.Path | None = None, - normalize_weights: bool = True, - ): - warnings.warn( - f'ClusteredOptimization is deprecated and will be removed in v{DEPRECATION_REMOVAL_VERSION}. ' - 'Use FlowSystem.transform.cluster(params) followed by FlowSystem.optimize(solver) instead. ' - 'Example: clustered_fs = flow_system.transform.cluster(params); clustered_fs.optimize(solver)', - DeprecationWarning, - stacklevel=2, - ) - if flow_system.scenarios is not None: - raise ValueError('Clustering is not supported for scenarios yet. Please use Optimization instead.') - if flow_system.periods is not None: - raise ValueError('Clustering is not supported for periods yet. Please use Optimization instead.') - # Skip parent deprecation warning by calling common init directly - _initialize_optimization_common( - self, - name=name, - flow_system=flow_system, - folder=folder, - normalize_weights=normalize_weights, - ) - self.clustering_parameters = clustering_parameters - self.components_to_clusterize = components_to_clusterize - self.clustering: Clustering | None = None - self.clustering_model: ClusteringModel | None = None - - def do_modeling(self) -> ClusteredOptimization: - t_start = timeit.default_timer() - self.flow_system.connect_and_transform() - self._perform_clustering() - - # Model the System - self.model = self.flow_system.create_model(self.normalize_weights) - self.model.do_modeling() - # Add Clustering Submodel after modeling the rest - self.clustering_model = ClusteringModel( - self.model, self.clustering_parameters, self.flow_system, self.clustering, self.components_to_clusterize - ) - self.clustering_model.do_modeling() - self.durations['modeling'] = round(timeit.default_timer() - t_start, 2) - return self - - def _perform_clustering(self): - from .clustering import Clustering - - t_start_agg = timeit.default_timer() - - # Validation - dt_min = float(self.flow_system.hours_per_timestep.min().item()) - dt_max = float(self.flow_system.hours_per_timestep.max().item()) - if not dt_min == dt_max: - raise ValueError( - f'Clustering failed due to inconsistent time step sizes:delta_t varies from {dt_min} to {dt_max} hours.' - ) - ratio = self.clustering_parameters.hours_per_period / dt_max - if not np.isclose(ratio, round(ratio), atol=1e-9): - raise ValueError( - f'The selected {self.clustering_parameters.hours_per_period=} does not match the time ' - f'step size of {dt_max} hours. It must be an integer multiple of {dt_max} hours.' - ) - - logger.info(f'{"":#^80}') - logger.info(f'{" Clustering TimeSeries Data ":#^80}') - - ds = self.flow_system.to_dataset() - - temporaly_changing_ds = drop_constant_arrays(ds, dim='time') - - # Clustering - creation of clustered timeseries: - self.clustering = Clustering( - original_data=temporaly_changing_ds.to_dataframe(), - hours_per_time_step=float(dt_min), - hours_per_period=self.clustering_parameters.hours_per_period, - nr_of_periods=self.clustering_parameters.nr_of_periods, - weights=self.calculate_clustering_weights(temporaly_changing_ds), - time_series_for_high_peaks=self.clustering_parameters.labels_for_high_peaks, - time_series_for_low_peaks=self.clustering_parameters.labels_for_low_peaks, - ) - - self.clustering.cluster() - result = self.clustering.plot(show=CONFIG.Plotting.default_show) - result.to_html(self.folder / 'clustering.html') - if self.clustering_parameters.aggregate_data_and_fix_non_binary_vars: - ds = self.flow_system.to_dataset() - for name, series in self.clustering.aggregated_data.items(): - da = ( - DataConverter.to_dataarray(series, self.flow_system.coords) - .rename(name) - .assign_attrs(ds[name].attrs) - ) - if TimeSeriesData.is_timeseries_data(da): - da = TimeSeriesData.from_dataarray(da) - - ds[name] = da - - self.flow_system = FlowSystem.from_dataset(ds) - self.flow_system.connect_and_transform() - self.durations['clustering'] = round(timeit.default_timer() - t_start_agg, 2) - - @classmethod - def calculate_clustering_weights(cls, ds: xr.Dataset) -> dict[str, float]: - """Calculate weights for all datavars in the dataset. Weights are pulled from the attrs of the datavars.""" - groups = [da.attrs.get('clustering_group') for da in ds.data_vars.values() if 'clustering_group' in da.attrs] - group_counts = Counter(groups) - - # Calculate weight for each group (1/count) - group_weights = {group: 1 / count for group, count in group_counts.items()} - - weights = {} - for name, da in ds.data_vars.items(): - clustering_group = da.attrs.get('clustering_group') - group_weight = group_weights.get(clustering_group) - if group_weight is not None: - weights[name] = group_weight - else: - weights[name] = da.attrs.get('clustering_weight', 1) - - if np.all(np.isclose(list(weights.values()), 1, atol=1e-6)): - logger.info('All Clustering weights were set to 1') - - return weights - - class SegmentedOptimization: """Solve large optimization problems by dividing time horizon into (overlapping) segments. diff --git a/flixopt/plot_result.py b/flixopt/plot_result.py index 683fbcf3e..85e692602 100644 --- a/flixopt/plot_result.py +++ b/flixopt/plot_result.py @@ -41,7 +41,7 @@ class PlotResult: Customizing the figure: - >>> result = clustering.plot() + >>> result = clustering.plot.compare() >>> result.update(title='My Custom Title').show() """ diff --git a/flixopt/results.py b/flixopt/results.py index 16d88743a..8ec860244 100644 --- a/flixopt/results.py +++ b/flixopt/results.py @@ -99,7 +99,7 @@ class Results(CompositeContainerMixin['ComponentResults | BusResults | EffectRes buses: Dictionary mapping bus labels to BusResults objects effects: Dictionary mapping effect names to EffectResults objects timesteps_extra: Extended time index including boundary conditions - hours_per_timestep: Duration of each timestep for proper energy optimizations + timestep_duration: Duration of each timestep in hours for proper energy calculations Examples: Load and analyze saved results: @@ -285,7 +285,7 @@ def __init__( self.flows = ResultsContainer(elements=flows_dict, element_type_name='flow results', truncate_repr=10) self.timesteps_extra = self.solution.indexes['time'] - self.hours_per_timestep = FlowSystem.calculate_hours_per_timestep(self.timesteps_extra) + self.timestep_duration = FlowSystem.calculate_timestep_duration(self.timesteps_extra) self.scenarios = self.solution.indexes['scenario'] if 'scenario' in self.solution.indexes else None self.periods = self.solution.indexes['period'] if 'period' in self.solution.indexes else None @@ -623,7 +623,7 @@ def flow_hours( .. deprecated:: Use `results.plot.all_flow_hours` (Dataset) or - `results.flows['FlowLabel'].flow_rate * results.hours_per_timestep` instead. + `results.flows['FlowLabel'].flow_rate * results.timestep_duration` instead. **Note**: The new API differs from this method: @@ -675,7 +675,7 @@ def flow_hours( stacklevel=2, ) if self._flow_hours is None: - self._flow_hours = (self.flow_rates() * self.hours_per_timestep).rename('flow_hours') + self._flow_hours = (self.flow_rates() * self.timestep_duration).rename('flow_hours') filters = {k: v for k, v in {'start': start, 'end': end, 'component': component}.items() if v is not None} return filter_dataarray_by_coord(self._flow_hours, **filters) @@ -1577,14 +1577,14 @@ def plot_node_balance_pie( dpi = plot_kwargs.pop('dpi', None) # None uses CONFIG.Plotting.default_dpi inputs = sanitize_dataset( - ds=self.solution[self.inputs] * self._results.hours_per_timestep, + ds=self.solution[self.inputs] * self._results.timestep_duration, threshold=1e-5, drop_small_vars=True, zero_small_values=True, drop_suffix='|', ) outputs = sanitize_dataset( - ds=self.solution[self.outputs] * self._results.hours_per_timestep, + ds=self.solution[self.outputs] * self._results.timestep_duration, threshold=1e-5, drop_small_vars=True, zero_small_values=True, @@ -1715,7 +1715,7 @@ def node_balance( ds, _ = _apply_selection_to_data(ds, select=select, drop=True) if unit_type == 'flow_hours': - ds = ds * self._results.hours_per_timestep + ds = ds * self._results.timestep_duration ds = ds.rename_vars({var: var.replace('flow_rate', 'flow_hours') for var in ds.data_vars}) return ds @@ -2016,7 +2016,7 @@ def flow_rate(self) -> xr.DataArray: @property def flow_hours(self) -> xr.DataArray: - return (self.flow_rate * self._results.hours_per_timestep).rename(f'{self.label}|flow_hours') + return (self.flow_rate * self._results.timestep_duration).rename(f'{self.label}|flow_hours') @property def size(self) -> xr.DataArray: diff --git a/flixopt/statistics_accessor.py b/flixopt/statistics_accessor.py index 952047cc5..572363be8 100644 --- a/flixopt/statistics_accessor.py +++ b/flixopt/statistics_accessor.py @@ -471,7 +471,7 @@ def flow_hours(self) -> xr.Dataset: """ self._require_solution() if self._flow_hours is None: - hours = self._fs.hours_per_timestep + hours = self._fs.timestep_duration flow_rates = self.flow_rates # Multiply and preserve/transform attributes data_vars = {} @@ -784,9 +784,9 @@ def get_contributor_type(contributor: str) -> str: label = f'{contributor}->{source_effect}({current_mode})' if label in solution: da = solution[label] * factor - # For total mode, sum temporal over time + # For total mode, sum temporal over time (apply cluster_weight for proper weighting) if mode == 'total' and current_mode == 'temporal' and 'time' in da.dims: - da = da.sum('time') + da = (da * self._fs.cluster_weight).sum('time') if share_total is None: share_total = da else: diff --git a/flixopt/structure.py b/flixopt/structure.py index 88fd9ce31..d401451c1 100644 --- a/flixopt/structure.py +++ b/flixopt/structure.py @@ -42,6 +42,92 @@ CLASS_REGISTRY = {} +@dataclass +class TimeSeriesWeights: + """Unified weighting system for time series aggregation (PyPSA-inspired). + + This class provides a clean, unified interface for time series weights, + combining the various weight types used in flixopt into a single object. + + Attributes: + temporal: Combined weight for temporal operations (timestep_duration × cluster_weight). + Applied to all time-summing operations. dims: [time] or [time, period, scenario] + period: Weight for each period in multi-period optimization. + dims: [period] or None + scenario: Weight for each scenario in stochastic optimization. + dims: [scenario] or None + objective: Optional override weight for objective function calculations. + If None, uses temporal weight. dims: [time] or [time, period, scenario] + storage: Optional override weight for storage balance equations. + If None, uses temporal weight. dims: [time] or [time, period, scenario] + + Example: + >>> # Access via FlowSystem + >>> weights = flow_system.weights + >>> weighted_sum = (flow_rate * weights.temporal).sum('time') + >>> + >>> # With period/scenario weighting + >>> total = weighted_sum * weights.period * weights.scenario + + Note: + For backwards compatibility, the existing properties (cluster_weight, + timestep_duration, aggregation_weight) are still available on FlowSystem + and FlowSystemModel. + """ + + temporal: xr.DataArray + period: xr.DataArray | None = None + scenario: xr.DataArray | None = None + objective: xr.DataArray | None = None + storage: xr.DataArray | None = None + + def __post_init__(self): + """Validate weights.""" + if not isinstance(self.temporal, xr.DataArray): + raise TypeError('temporal must be an xarray DataArray') + if 'time' not in self.temporal.dims: + raise ValueError("temporal must have 'time' dimension") + + @property + def effective_objective(self) -> xr.DataArray: + """Get effective objective weight (override or temporal).""" + return self.objective if self.objective is not None else self.temporal + + @property + def effective_storage(self) -> xr.DataArray: + """Get effective storage weight (override or temporal).""" + return self.storage if self.storage is not None else self.temporal + + def sum_over_time(self, data: xr.DataArray) -> xr.DataArray: + """Sum data over time dimension with proper weighting. + + Args: + data: DataArray with 'time' dimension. + + Returns: + Data summed over time with temporal weighting applied. + """ + if 'time' not in data.dims: + return data + return (data * self.temporal).sum('time') + + def apply_period_scenario_weights(self, data: xr.DataArray) -> xr.DataArray: + """Apply period and scenario weights to data. + + Args: + data: DataArray, optionally with 'period' and/or 'scenario' dims. + + Returns: + Data with period and scenario weights applied. + """ + result = data + if self.period is not None and 'period' in data.dims: + result = result * self.period + if self.scenario is not None and 'scenario' in data.dims: + result = result * self.scenario + return result + + def register_class_for_io(cls): """Register a class for serialization/deserialization.""" name = cls.__name__ @@ -209,13 +295,32 @@ def solution(self): return solution @property - def hours_per_step(self): - return self.flow_system.hours_per_timestep + def timestep_duration(self) -> xr.DataArray: + """Duration of each timestep in hours.""" + return self.flow_system.timestep_duration @property def hours_of_previous_timesteps(self): return self.flow_system.hours_of_previous_timesteps + @property + def cluster_weight(self) -> xr.DataArray: + """Cluster weight for cluster() optimization. + + Represents how many original timesteps each cluster represents. + Default is 1.0 for all timesteps. + """ + return self.flow_system.cluster_weight + + @property + def aggregation_weight(self) -> xr.DataArray: + """Combined weight for time aggregation. + + Combines timestep_duration (physical duration) and cluster_weight (cluster representation). + Use this for proper time aggregation in clustered models. + """ + return self.timestep_duration * self.cluster_weight + @property def scenario_weights(self) -> xr.DataArray: """ @@ -1703,8 +1808,8 @@ def __repr__(self) -> str: return f'{model_string}\n{"=" * len(model_string)}\n\n{all_sections}' @property - def hours_per_step(self): - return self._model.hours_per_step + def timestep_duration(self): + return self._model.timestep_duration def _do_modeling(self): """ diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py index eaec1a3b6..a209ce4ab 100644 --- a/flixopt/transform_accessor.py +++ b/flixopt/transform_accessor.py @@ -11,13 +11,11 @@ from collections import defaultdict from typing import TYPE_CHECKING, Any, Literal +import numpy as np import pandas as pd import xarray as xr if TYPE_CHECKING: - import numpy as np - - from .clustering import ClusteringParameters from .flow_system import FlowSystem logger = logging.getLogger('flixopt') @@ -31,11 +29,11 @@ class TransformAccessor: with modified structure or data, accessible via `flow_system.transform`. Examples: - Clustered optimization: + Time series aggregation (8 typical days): - >>> clustered_fs = flow_system.transform.cluster(params) - >>> clustered_fs.optimize(solver) - >>> print(clustered_fs.solution) + >>> reduced_fs = flow_system.transform.cluster(n_clusters=8, cluster_duration='1D') + >>> reduced_fs.optimize(solver) + >>> expanded_fs = reduced_fs.transform.expand_solution() Future MGA: @@ -52,126 +50,6 @@ def __init__(self, flow_system: FlowSystem) -> None: """ self._fs = flow_system - def cluster( - self, - parameters: ClusteringParameters, - components_to_clusterize: list | None = None, - ) -> FlowSystem: - """ - Create a clustered FlowSystem for time series aggregation. - - This method creates a new FlowSystem that can be optimized with - clustered time series data. The clustering reduces computational - complexity by identifying representative time periods. - - The returned FlowSystem: - - Has the same timesteps as the original (clustering works via constraints, not reduction) - - Has aggregated time series data (if `aggregate_data_and_fix_non_binary_vars=True`) - - Will have clustering constraints added during `build_model()` - - Args: - parameters: Clustering parameters specifying period duration, - number of periods, and aggregation settings. - components_to_clusterize: List of components to apply clustering to. - If None, all components are clustered. - - Returns: - A new FlowSystem configured for clustered optimization. - - Raises: - ValueError: If timestep sizes are inconsistent. - ValueError: If hours_per_period is not a multiple of timestep size. - - Examples: - Basic clustered optimization: - - >>> from flixopt import ClusteringParameters - >>> params = ClusteringParameters( - ... hours_per_period=24, - ... nr_of_periods=8, - ... fix_storage_flows=True, - ... aggregate_data_and_fix_non_binary_vars=True, - ... ) - >>> clustered_fs = flow_system.transform.cluster(params) - >>> clustered_fs.optimize(solver) - >>> print(clustered_fs.solution) - - With model modifications: - - >>> clustered_fs = flow_system.transform.cluster(params) - >>> clustered_fs.build_model() - >>> clustered_fs.model.add_constraints(...) - >>> clustered_fs.solve(solver) - """ - import numpy as np - - from .clustering import Clustering - from .core import DataConverter, TimeSeriesData, drop_constant_arrays - - # Validation - dt_min = float(self._fs.hours_per_timestep.min().item()) - dt_max = float(self._fs.hours_per_timestep.max().item()) - if dt_min != dt_max: - raise ValueError( - f'Clustering failed due to inconsistent time step sizes: ' - f'delta_t varies from {dt_min} to {dt_max} hours.' - ) - ratio = parameters.hours_per_period / dt_max - if not np.isclose(ratio, round(ratio), atol=1e-9): - raise ValueError( - f'The selected hours_per_period={parameters.hours_per_period} does not match the time ' - f'step size of {dt_max} hours. It must be an integer multiple of {dt_max} hours.' - ) - - logger.info(f'{"":#^80}') - logger.info(f'{" Clustering TimeSeries Data ":#^80}') - - # Get dataset representation - ds = self._fs.to_dataset() - temporaly_changing_ds = drop_constant_arrays(ds, dim='time') - - # Perform clustering - clustering = Clustering( - original_data=temporaly_changing_ds.to_dataframe(), - hours_per_time_step=float(dt_min), - hours_per_period=parameters.hours_per_period, - nr_of_periods=parameters.nr_of_periods, - weights=self._calculate_clustering_weights(temporaly_changing_ds), - time_series_for_high_peaks=parameters.labels_for_high_peaks, - time_series_for_low_peaks=parameters.labels_for_low_peaks, - ) - clustering.cluster() - - # Create new FlowSystem (with aggregated data if requested) - if parameters.aggregate_data_and_fix_non_binary_vars: - # Note: A second to_dataset() call is required here because: - # 1. The first 'ds' (line 124) was processed by drop_constant_arrays() - # 2. We need the full unprocessed dataset to apply aggregated data modifications - # 3. The clustering used 'temporaly_changing_ds' for input, not the full 'ds' - ds = self._fs.to_dataset() - for name, series in clustering.aggregated_data.items(): - da = DataConverter.to_dataarray(series, self._fs.coords).rename(name).assign_attrs(ds[name].attrs) - if TimeSeriesData.is_timeseries_data(da): - da = TimeSeriesData.from_dataarray(da) - ds[name] = da - - from .flow_system import FlowSystem - - clustered_fs = FlowSystem.from_dataset(ds) - else: - # Copy without data modification - clustered_fs = self._fs.copy() - - # Store clustering info for later use - clustered_fs._clustering_info = { - 'parameters': parameters, - 'clustering': clustering, - 'components_to_clusterize': components_to_clusterize, - 'original_fs': self._fs, - } - - return clustered_fs - @staticmethod def _calculate_clustering_weights(ds) -> dict[str, float]: """Calculate weights for clustering based on dataset attributes.""" @@ -696,8 +574,531 @@ def fix_sizes( return new_fs - # Future methods can be added here: - # - # def mga(self, alternatives: int = 5) -> FlowSystem: - # """Create a FlowSystem configured for Modeling to Generate Alternatives.""" - # ... + def cluster( + self, + n_clusters: int, + cluster_duration: str | float, + weights: dict[str, float] | None = None, + time_series_for_high_peaks: list[str] | None = None, + time_series_for_low_peaks: list[str] | None = None, + ) -> FlowSystem: + """ + Create a FlowSystem with reduced timesteps using typical clusters. + + This method creates a new FlowSystem optimized for sizing studies by reducing + the number of timesteps to only the typical (representative) clusters identified + through time series aggregation using the tsam package. + + The method: + 1. Performs time series clustering using tsam (k-means) + 2. Extracts only the typical clusters (not all original timesteps) + 3. Applies timestep weighting for accurate cost representation + 4. Handles storage states between clusters based on each Storage's ``cluster_mode`` + + Use this for initial sizing optimization, then use ``fix_sizes()`` to re-optimize + at full resolution for accurate dispatch results. + + Args: + n_clusters: Number of clusters (typical periods) to extract (e.g., 8 typical days). + cluster_duration: Duration of each cluster. Can be a pandas-style string + ('1D', '24h', '6h') or a numeric value in hours. + weights: Optional clustering weights per time series. Keys are time series labels. + time_series_for_high_peaks: Time series labels for explicitly selecting high-value + clusters. **Recommended** for demand time series to capture peak demand days. + time_series_for_low_peaks: Time series labels for explicitly selecting low-value clusters. + + Returns: + A new FlowSystem with reduced timesteps (only typical clusters). + The FlowSystem has metadata stored in ``clustering`` for expansion. + + Raises: + ValueError: If timestep sizes are inconsistent. + ValueError: If cluster_duration is not a multiple of timestep size. + + Examples: + Two-stage sizing optimization: + + >>> # Stage 1: Size with reduced timesteps (fast) + >>> fs_sizing = flow_system.transform.cluster( + ... n_clusters=8, + ... cluster_duration='1D', + ... time_series_for_high_peaks=['HeatDemand(Q_th)|fixed_relative_profile'], + ... ) + >>> fs_sizing.optimize(solver) + >>> + >>> # Apply safety margin (typical clusters may smooth peaks) + >>> sizes_with_margin = { + ... name: float(size.item()) * 1.05 for name, size in fs_sizing.statistics.sizes.items() + ... } + >>> + >>> # Stage 2: Fix sizes and re-optimize at full resolution + >>> fs_dispatch = flow_system.transform.fix_sizes(sizes_with_margin) + >>> fs_dispatch.optimize(solver) + + Note: + - This is best suited for initial sizing, not final dispatch optimization + - Use ``time_series_for_high_peaks`` to ensure peak demand clusters are captured + - A 5-10% safety margin on sizes is recommended for the dispatch stage + - For seasonal storage (e.g., hydrogen, thermal storage), set + ``Storage.cluster_mode='intercluster'`` or ``'intercluster_cyclic'`` + """ + import tsam.timeseriesaggregation as tsam + + from .clustering import Clustering, ClusterResult, ClusterStructure + from .core import TimeSeriesData, drop_constant_arrays + from .flow_system import FlowSystem + + # Parse cluster_duration to hours + hours_per_cluster = ( + pd.Timedelta(cluster_duration).total_seconds() / 3600 + if isinstance(cluster_duration, str) + else float(cluster_duration) + ) + + # Validation + dt = float(self._fs.timestep_duration.min().item()) + if not np.isclose(dt, float(self._fs.timestep_duration.max().item())): + raise ValueError( + f'cluster() requires uniform timestep sizes, got min={dt}h, ' + f'max={float(self._fs.timestep_duration.max().item())}h.' + ) + if not np.isclose(hours_per_cluster / dt, round(hours_per_cluster / dt), atol=1e-9): + raise ValueError(f'cluster_duration={hours_per_cluster}h must be a multiple of timestep size ({dt}h).') + + timesteps_per_cluster = int(round(hours_per_cluster / dt)) + has_periods = self._fs.periods is not None + has_scenarios = self._fs.scenarios is not None + + logger.info(f'{"":#^80}') + logger.info(f'{" Creating Typical Clusters ":#^80}') + + # Determine iteration dimensions + periods = list(self._fs.periods) if has_periods else [None] + scenarios = list(self._fs.scenarios) if has_scenarios else [None] + + ds = self._fs.to_dataset(include_solution=False) + + # Cluster each (period, scenario) combination using tsam directly + tsam_results: dict[tuple, tsam.TimeSeriesAggregation] = {} + cluster_orders: dict[tuple, np.ndarray] = {} + cluster_occurrences_all: dict[tuple, dict] = {} + use_extreme_periods = bool(time_series_for_high_peaks or time_series_for_low_peaks) + + for period_label in periods: + for scenario_label in scenarios: + key = (period_label, scenario_label) + selector = {k: v for k, v in [('period', period_label), ('scenario', scenario_label)] if v is not None} + ds_slice = ds.sel(**selector, drop=True) if selector else ds + temporaly_changing_ds = drop_constant_arrays(ds_slice, dim='time') + df = temporaly_changing_ds.to_dataframe() + + if selector: + logger.info(f'Clustering {", ".join(f"{k}={v}" for k, v in selector.items())}...') + + # Use tsam directly + clustering_weights = weights or self._calculate_clustering_weights(temporaly_changing_ds) + tsam_agg = tsam.TimeSeriesAggregation( + df, + noTypicalPeriods=n_clusters, + hoursPerPeriod=hours_per_cluster, + resolution=dt, + clusterMethod='k_means', + extremePeriodMethod='new_cluster_center' if use_extreme_periods else 'None', + weightDict={name: w for name, w in clustering_weights.items() if name in df.columns}, + addPeakMax=time_series_for_high_peaks or [], + addPeakMin=time_series_for_low_peaks or [], + ) + tsam_agg.createTypicalPeriods() + + tsam_results[key] = tsam_agg + cluster_orders[key] = tsam_agg.clusterOrder + cluster_occurrences_all[key] = tsam_agg.clusterPeriodNoOccur + + # Use first result for structure + first_key = (periods[0], scenarios[0]) + first_tsam = tsam_results[first_key] + n_reduced_timesteps = len(first_tsam.typicalPeriods) + actual_n_clusters = len(first_tsam.clusterPeriodNoOccur) + + # Create new time index (needed for weights and typical periods) + new_time_index = pd.date_range( + start=self._fs.timesteps[0], periods=n_reduced_timesteps, freq=pd.Timedelta(hours=dt) + ) + + # Create timestep weights from cluster occurrences (per period/scenario) + def _build_weights_for_key(key: tuple) -> xr.DataArray: + occurrences = cluster_occurrences_all[key] + weights = np.repeat([occurrences.get(c, 1) for c in range(actual_n_clusters)], timesteps_per_cluster) + return xr.DataArray(weights, dims=['time'], coords={'time': new_time_index}) + + # Build weights - use _combine_slices_to_dataarray for consistent multi-dim handling + weights_slices = {key: _build_weights_for_key(key) for key in cluster_occurrences_all} + # Create a dummy 1D DataArray as template for _combine_slices_to_dataarray + dummy_template = xr.DataArray(np.zeros(n_reduced_timesteps), dims=['time']) + timestep_weights = self._combine_slices_to_dataarray( + weights_slices, dummy_template, new_time_index, periods, scenarios + ) + + logger.info(f'Reduced from {len(self._fs.timesteps)} to {n_reduced_timesteps} timesteps') + logger.info(f'Clusters: {actual_n_clusters} (requested: {n_clusters})') + + # Build typical periods DataArrays keyed by (variable_name, (period, scenario)) + typical_das: dict[str, dict[tuple, xr.DataArray]] = {} + for key, tsam_agg in tsam_results.items(): + typical_df = tsam_agg.typicalPeriods + for col in typical_df.columns: + typical_das.setdefault(col, {})[key] = xr.DataArray( + typical_df[col].values, dims=['time'], coords={'time': new_time_index} + ) + + # Build reduced dataset + all_keys = {(p, s) for p in periods for s in scenarios} + ds_new_vars = {} + for name, original_da in ds.data_vars.items(): + if 'time' not in original_da.dims: + ds_new_vars[name] = original_da.copy() + elif name not in typical_das or set(typical_das[name].keys()) != all_keys: + # Time-dependent but constant (or not present in all clustering results): slice to new time length + ds_new_vars[name] = original_da.isel(time=slice(0, n_reduced_timesteps)).assign_coords( + time=new_time_index + ) + else: + # Time-varying: combine per-(period, scenario) slices + da = self._combine_slices_to_dataarray( + slices=typical_das[name], + original_da=original_da, + new_time_index=new_time_index, + periods=periods, + scenarios=scenarios, + ) + if TimeSeriesData.is_timeseries_data(original_da): + da = TimeSeriesData.from_dataarray(da.assign_attrs(original_da.attrs)) + ds_new_vars[name] = da + + ds_new = xr.Dataset(ds_new_vars, attrs=ds.attrs) + ds_new.attrs['timesteps_per_cluster'] = timesteps_per_cluster + ds_new.attrs['timestep_duration'] = dt + + reduced_fs = FlowSystem.from_dataset(ds_new) + # Set cluster_weight - might have period/scenario dimensions + reduced_fs.cluster_weight = reduced_fs.fit_to_model_coords( + 'cluster_weight', timestep_weights, dims=['scenario', 'period', 'time'] + ) + + # Remove 'equals_final' from storages - doesn't make sense on reduced timesteps + # Set to None so initial SOC is free (handled by storage_mode constraints) + for storage in reduced_fs.storages.values(): + ics = storage.initial_charge_state + if isinstance(ics, str) and ics == 'equals_final': + storage.initial_charge_state = None + + # Build Clustering for inter-cluster linking and solution expansion + n_original_timesteps = len(self._fs.timesteps) + + # Build per-slice cluster_order and timestep_mapping as multi-dimensional DataArrays + # This is needed because each (period, scenario) combination may have different clustering + + def _build_timestep_mapping_for_key(key: tuple) -> np.ndarray: + """Build timestep_mapping for a single (period, scenario) slice.""" + mapping = np.zeros(n_original_timesteps, dtype=np.int32) + for period_idx, cluster_id in enumerate(cluster_orders[key]): + for pos in range(timesteps_per_cluster): + original_idx = period_idx * timesteps_per_cluster + pos + if original_idx < n_original_timesteps: + representative_idx = cluster_id * timesteps_per_cluster + pos + mapping[original_idx] = representative_idx + return mapping + + def _build_cluster_occurrences_for_key(key: tuple) -> np.ndarray: + """Build cluster_occurrences array for a single (period, scenario) slice.""" + occurrences = cluster_occurrences_all[key] + return np.array([occurrences.get(c, 0) for c in range(actual_n_clusters)]) + + # Build multi-dimensional arrays + if has_periods or has_scenarios: + # Multi-dimensional case: build arrays for each (period, scenario) combination + # cluster_order: dims [original_period, period?, scenario?] + cluster_order_slices = {} + timestep_mapping_slices = {} + cluster_occurrences_slices = {} + + for p in periods: + for s in scenarios: + key = (p, s) + cluster_order_slices[key] = xr.DataArray( + cluster_orders[key], dims=['original_period'], name='cluster_order' + ) + timestep_mapping_slices[key] = xr.DataArray( + _build_timestep_mapping_for_key(key), dims=['original_time'], name='timestep_mapping' + ) + cluster_occurrences_slices[key] = xr.DataArray( + _build_cluster_occurrences_for_key(key), dims=['cluster'], name='cluster_occurrences' + ) + + # Combine slices into multi-dimensional DataArrays + cluster_order_da = self._combine_slices_to_dataarray_generic( + cluster_order_slices, ['original_period'], periods, scenarios, 'cluster_order' + ) + timestep_mapping_da = self._combine_slices_to_dataarray_generic( + timestep_mapping_slices, ['original_time'], periods, scenarios, 'timestep_mapping' + ) + cluster_occurrences_da = self._combine_slices_to_dataarray_generic( + cluster_occurrences_slices, ['cluster'], periods, scenarios, 'cluster_occurrences' + ) + else: + # Simple case: single (None, None) slice + cluster_order_da = xr.DataArray(cluster_orders[first_key], dims=['original_period'], name='cluster_order') + timestep_mapping_da = xr.DataArray( + _build_timestep_mapping_for_key(first_key), dims=['original_time'], name='timestep_mapping' + ) + cluster_occurrences_da = xr.DataArray( + _build_cluster_occurrences_for_key(first_key), dims=['cluster'], name='cluster_occurrences' + ) + + cluster_structure = ClusterStructure( + cluster_order=cluster_order_da, + cluster_occurrences=cluster_occurrences_da, + n_clusters=actual_n_clusters, + timesteps_per_cluster=timesteps_per_cluster, + ) + + aggregation_result = ClusterResult( + timestep_mapping=timestep_mapping_da, + n_representatives=n_reduced_timesteps, + representative_weights=timestep_weights.rename('representative_weights'), + cluster_structure=cluster_structure, + original_data=ds, + aggregated_data=ds_new, + ) + + reduced_fs.clustering = Clustering( + result=aggregation_result, + original_flow_system=self._fs, + backend_name='tsam', + ) + + return reduced_fs + + @staticmethod + def _combine_slices_to_dataarray( + slices: dict[tuple, xr.DataArray], + original_da: xr.DataArray, + new_time_index: pd.DatetimeIndex, + periods: list, + scenarios: list, + ) -> xr.DataArray: + """Combine per-(period, scenario) slices into a multi-dimensional DataArray using xr.concat. + + Args: + slices: Dict mapping (period, scenario) tuples to 1D DataArrays (time only). + original_da: Original DataArray to get dimension order and attrs from. + new_time_index: New time coordinate for the output. + periods: List of period labels ([None] if no periods dimension). + scenarios: List of scenario labels ([None] if no scenarios dimension). + + Returns: + DataArray with dimensions matching original_da but reduced time. + """ + first_key = (periods[0], scenarios[0]) + has_periods = periods != [None] + has_scenarios = scenarios != [None] + + # Simple case: no period/scenario dimensions + if not has_periods and not has_scenarios: + return slices[first_key].assign_attrs(original_da.attrs) + + # Multi-dimensional: use xr.concat to stack along period/scenario dims + if has_periods and has_scenarios: + # Stack scenarios first, then periods + period_arrays = [] + for p in periods: + scenario_arrays = [slices[(p, s)] for s in scenarios] + period_arrays.append(xr.concat(scenario_arrays, dim=pd.Index(scenarios, name='scenario'))) + result = xr.concat(period_arrays, dim=pd.Index(periods, name='period')) + elif has_periods: + result = xr.concat([slices[(p, None)] for p in periods], dim=pd.Index(periods, name='period')) + else: + result = xr.concat([slices[(None, s)] for s in scenarios], dim=pd.Index(scenarios, name='scenario')) + + # Put time dimension first (standard order), preserve other dims + result = result.transpose('time', ...) + + return result.assign_attrs(original_da.attrs) + + @staticmethod + def _combine_slices_to_dataarray_generic( + slices: dict[tuple, xr.DataArray], + base_dims: list[str], + periods: list, + scenarios: list, + name: str, + ) -> xr.DataArray: + """Combine per-(period, scenario) slices into a multi-dimensional DataArray. + + Generic version that works with any base dimension (not just 'time'). + + Args: + slices: Dict mapping (period, scenario) tuples to DataArrays. + base_dims: Base dimensions of each slice (e.g., ['original_period'] or ['original_time']). + periods: List of period labels ([None] if no periods dimension). + scenarios: List of scenario labels ([None] if no scenarios dimension). + name: Name for the resulting DataArray. + + Returns: + DataArray with dimensions [base_dims..., period?, scenario?]. + """ + first_key = (periods[0], scenarios[0]) + has_periods = periods != [None] + has_scenarios = scenarios != [None] + + # Simple case: no period/scenario dimensions + if not has_periods and not has_scenarios: + return slices[first_key].rename(name) + + # Multi-dimensional: use xr.concat to stack along period/scenario dims + if has_periods and has_scenarios: + # Stack scenarios first, then periods + period_arrays = [] + for p in periods: + scenario_arrays = [slices[(p, s)] for s in scenarios] + period_arrays.append(xr.concat(scenario_arrays, dim=pd.Index(scenarios, name='scenario'))) + result = xr.concat(period_arrays, dim=pd.Index(periods, name='period')) + elif has_periods: + result = xr.concat([slices[(p, None)] for p in periods], dim=pd.Index(periods, name='period')) + else: + result = xr.concat([slices[(None, s)] for s in scenarios], dim=pd.Index(scenarios, name='scenario')) + + # Put base dimension first (standard order) + result = result.transpose(base_dims[0], ...) + + return result.rename(name) + + def expand_solution(self) -> FlowSystem: + """Expand a reduced (clustered) FlowSystem back to full original timesteps. + + After solving a FlowSystem created with ``cluster()``, this method + disaggregates the FlowSystem by: + 1. Expanding all time series data from typical clusters to full timesteps + 2. Expanding the solution by mapping each typical cluster back to all + original segments it represents + + For FlowSystems with periods and/or scenarios, each (period, scenario) + combination is expanded using its own cluster assignment. + + This enables using all existing solution accessors (``statistics``, ``plot``, etc.) + with full time resolution, where both the data and solution are consistently + expanded from the typical clusters. + + Returns: + FlowSystem: A new FlowSystem with full timesteps and expanded solution. + + Raises: + ValueError: If the FlowSystem was not created with ``cluster()``. + ValueError: If the FlowSystem has no solution. + + Examples: + Two-stage optimization with solution expansion: + + >>> # Stage 1: Size with reduced timesteps + >>> fs_reduced = flow_system.transform.cluster( + ... n_clusters=8, + ... cluster_duration='1D', + ... ) + >>> fs_reduced.optimize(solver) + >>> + >>> # Expand to full resolution FlowSystem + >>> fs_expanded = fs_reduced.transform.expand_solution() + >>> + >>> # Use all existing accessors with full timesteps + >>> fs_expanded.statistics.flow_rates # Full 8760 timesteps + >>> fs_expanded.statistics.plot.balance('HeatBus') # Full resolution plots + >>> fs_expanded.statistics.plot.heatmap('Boiler(Q_th)|flow_rate') + + Note: + The expanded FlowSystem repeats the typical cluster values for all + segments belonging to the same cluster. Both input data and solution + are consistently expanded, so they match. This is an approximation - + the actual dispatch at full resolution would differ due to + intra-cluster variations in time series data. + + For accurate dispatch results, use ``fix_sizes()`` to fix the sizes + from the reduced optimization and re-optimize at full resolution. + """ + from .flow_system import FlowSystem + + # Validate + if self._fs.clustering is None: + raise ValueError( + 'expand_solution() requires a FlowSystem created with cluster(). ' + 'This FlowSystem has no aggregation info.' + ) + if self._fs.solution is None: + raise ValueError('FlowSystem has no solution. Run optimize() or solve() first.') + + info = self._fs.clustering + cluster_structure = info.result.cluster_structure + if cluster_structure is None: + raise ValueError('No cluster structure available for expansion.') + + timesteps_per_cluster = cluster_structure.timesteps_per_cluster + original_fs: FlowSystem = info.original_flow_system + n_clusters = ( + int(cluster_structure.n_clusters) + if isinstance(cluster_structure.n_clusters, (int, np.integer)) + else int(cluster_structure.n_clusters.values) + ) + has_periods = original_fs.periods is not None + has_scenarios = original_fs.scenarios is not None + + periods = list(original_fs.periods) if has_periods else [None] + scenarios = list(original_fs.scenarios) if has_scenarios else [None] + + original_timesteps = original_fs.timesteps + n_original_timesteps = len(original_timesteps) + n_reduced_timesteps = n_clusters * timesteps_per_cluster + + # Expand function using ClusterResult.expand_data() - handles multi-dimensional cases + def expand_da(da: xr.DataArray) -> xr.DataArray: + if 'time' not in da.dims: + return da.copy() + return info.result.expand_data(da, original_time=original_timesteps) + + # 1. Expand FlowSystem data (with cluster_weight set to 1.0 for all timesteps) + reduced_ds = self._fs.to_dataset(include_solution=False) + expanded_ds = xr.Dataset( + {name: expand_da(da) for name, da in reduced_ds.data_vars.items() if name != 'cluster_weight'}, + attrs=reduced_ds.attrs, + ) + expanded_ds.attrs['timestep_duration'] = original_fs.timestep_duration.values.tolist() + + # Create cluster_weight with value 1.0 for all timesteps (no weighting needed for expanded) + # Use _combine_slices_to_dataarray for consistent multi-dim handling + ones_da = xr.DataArray(np.ones(n_original_timesteps), dims=['time'], coords={'time': original_timesteps}) + ones_slices = {(p, s): ones_da for p in periods for s in scenarios} + cluster_weight = self._combine_slices_to_dataarray( + ones_slices, ones_da, original_timesteps, periods, scenarios + ).rename('cluster_weight') + expanded_ds['cluster_weight'] = cluster_weight + + expanded_fs = FlowSystem.from_dataset(expanded_ds) + + # 2. Expand solution + reduced_solution = self._fs.solution + expanded_fs._solution = xr.Dataset( + {name: expand_da(da) for name, da in reduced_solution.data_vars.items()}, + attrs=reduced_solution.attrs, + ) + + n_combinations = len(periods) * len(scenarios) + n_original_segments = cluster_structure.n_original_periods + logger.info( + f'Expanded FlowSystem from {n_reduced_timesteps} to {n_original_timesteps} timesteps ' + f'({n_clusters} clusters' + + ( + f', {n_combinations} period/scenario combinations)' + if n_combinations > 1 + else f' → {n_original_segments} original segments)' + ) + ) + + return expanded_fs diff --git a/mkdocs.yml b/mkdocs.yml index 551fac523..493937983 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -69,7 +69,7 @@ nav: - Piecewise Effects: notebooks/06c-piecewise-effects.ipynb - Scaling: - Scenarios: notebooks/07-scenarios-and-periods.ipynb - - Aggregation: notebooks/08a-aggregation.ipynb + - Clustering: notebooks/08a-aggregation.ipynb - Rolling Horizon: notebooks/08b-rolling-horizon.ipynb - Results: - Plotting: notebooks/09-plotting-and-data-access.ipynb diff --git a/tests/deprecated/examples/03_Optimization_modes/example_optimization_modes.py b/tests/deprecated/examples/03_Optimization_modes/example_optimization_modes.py index e85c4329e..91bbaeaaa 100644 --- a/tests/deprecated/examples/03_Optimization_modes/example_optimization_modes.py +++ b/tests/deprecated/examples/03_Optimization_modes/example_optimization_modes.py @@ -33,15 +33,10 @@ def get_solutions(optimizations: list, variable: str) -> xr.Dataset: # Segmented Properties segment_length, overlap_length = 96, 1 - # Aggregated Properties - clustering_parameters = fx.ClusteringParameters( - hours_per_period=6, - nr_of_periods=4, - fix_storage_flows=False, - aggregate_data_and_fix_non_binary_vars=True, - percentage_of_period_freedom=0, - penalty_of_period_freedom=0, - ) + # Clustering Properties + n_clusters = 4 + cluster_duration = '6h' + include_storage = False keep_extreme_periods = True imbalance_penalty = 1e5 # or set to None if not needed @@ -195,12 +190,27 @@ def get_solutions(optimizations: list, variable: str) -> xr.Dataset: optimizations.append(optimization) if aggregated: - if keep_extreme_periods: - clustering_parameters.time_series_for_high_peaks = [TS_heat_demand] - clustering_parameters.time_series_for_low_peaks = [TS_electricity_demand, TS_heat_demand] - optimization = fx.ClusteredOptimization('Aggregated', flow_system.copy(), clustering_parameters) - optimization.do_modeling() - optimization.solve(fx.solvers.HighsSolver(0.01 / 100, 60)) + # Use the new transform.cluster() API + time_series_for_high_peaks = [TS_heat_demand] if keep_extreme_periods else None + time_series_for_low_peaks = [TS_electricity_demand, TS_heat_demand] if keep_extreme_periods else None + + clustered_fs = flow_system.copy().transform.cluster( + n_clusters=n_clusters, + cluster_duration=cluster_duration, + include_storage=include_storage, + time_series_for_high_peaks=time_series_for_high_peaks, + time_series_for_low_peaks=time_series_for_low_peaks, + ) + clustered_fs.optimize(fx.solvers.HighsSolver(0.01 / 100, 60)) + + # Wrap in a simple object for compatibility with comparison code + class ClusteredResult: + def __init__(self, name, fs): + self.name = name + self.flow_system = fs + self.durations = {'total': 0} # Placeholder + + optimization = ClusteredResult('Clustered', clustered_fs) optimizations.append(optimization) # --- Plotting for comparison --- diff --git a/tests/deprecated/test_bus.py b/tests/deprecated/test_bus.py index cc49a2073..9bb7ddbe3 100644 --- a/tests/deprecated/test_bus.py +++ b/tests/deprecated/test_bus.py @@ -74,8 +74,8 @@ def test_bus_penalty(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['TestBus->Penalty(temporal)'], model.variables['TestBus->Penalty(temporal)'] - == model.variables['TestBus|virtual_supply'] * 1e5 * model.hours_per_step - + model.variables['TestBus|virtual_demand'] * 1e5 * model.hours_per_step, + == model.variables['TestBus|virtual_supply'] * 1e5 * model.timestep_duration + + model.variables['TestBus|virtual_demand'] * 1e5 * model.timestep_duration, ) def test_bus_with_coords(self, basic_flow_system_linopy_coords, coords_config): diff --git a/tests/deprecated/test_effect.py b/tests/deprecated/test_effect.py index b3bb278f0..1cf625c1b 100644 --- a/tests/deprecated/test_effect.py +++ b/tests/deprecated/test_effect.py @@ -130,8 +130,8 @@ def test_bounds(self, basic_flow_system_linopy_coords, coords_config): assert_var_equal( model.variables['Effect1(temporal)|per_timestep'], model.add_variables( - lower=4.0 * model.hours_per_step, - upper=4.1 * model.hours_per_step, + lower=4.0 * model.timestep_duration, + upper=4.1 * model.timestep_duration, coords=model.get_coords(['time', 'period', 'scenario']), ), ) diff --git a/tests/deprecated/test_flow.py b/tests/deprecated/test_flow.py index 594bc1fbb..8e1ce1f53 100644 --- a/tests/deprecated/test_flow.py +++ b/tests/deprecated/test_flow.py @@ -23,7 +23,7 @@ def test_flow_minimal(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['Sink(Wärme)|total_flow_hours'], flow.submodel.variables['Sink(Wärme)|total_flow_hours'] - == (flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.hours_per_step).sum('time'), + == (flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.timestep_duration).sum('time'), ) assert_var_equal(flow.submodel.flow_rate, model.add_variables(lower=0, upper=100, coords=model.get_coords())) assert_var_equal( @@ -61,7 +61,7 @@ def test_flow(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['Sink(Wärme)|total_flow_hours'], flow.submodel.variables['Sink(Wärme)|total_flow_hours'] - == (flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.hours_per_step).sum('time'), + == (flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.timestep_duration).sum('time'), ) assert_var_equal( @@ -83,12 +83,12 @@ def test_flow(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['Sink(Wärme)|load_factor_min'], - flow.submodel.variables['Sink(Wärme)|total_flow_hours'] >= model.hours_per_step.sum('time') * 0.1 * 100, + flow.submodel.variables['Sink(Wärme)|total_flow_hours'] >= model.timestep_duration.sum('time') * 0.1 * 100, ) assert_conequal( model.constraints['Sink(Wärme)|load_factor_max'], - flow.submodel.variables['Sink(Wärme)|total_flow_hours'] <= model.hours_per_step.sum('time') * 0.9 * 100, + flow.submodel.variables['Sink(Wärme)|total_flow_hours'] <= model.timestep_duration.sum('time') * 0.9 * 100, ) assert_sets_equal( @@ -129,13 +129,13 @@ def test_effects_per_flow_hour(self, basic_flow_system_linopy_coords, coords_con assert_conequal( model.constraints['Sink(Wärme)->costs(temporal)'], model.variables['Sink(Wärme)->costs(temporal)'] - == flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.hours_per_step * costs_per_flow_hour, + == flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.timestep_duration * costs_per_flow_hour, ) assert_conequal( model.constraints['Sink(Wärme)->CO2(temporal)'], model.variables['Sink(Wärme)->CO2(temporal)'] - == flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.hours_per_step * co2_per_flow_hour, + == flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.timestep_duration * co2_per_flow_hour, ) @@ -561,7 +561,7 @@ def test_flow_on(self, basic_flow_system_linopy_coords, coords_config): model.add_variables(binary=True, coords=model.get_coords()), ) # Upper bound is total hours when active_hours_max is not specified - total_hours = model.hours_per_step.sum('time') + total_hours = model.timestep_duration.sum('time') assert_var_equal( model.variables['Sink(Wärme)|active_hours'], model.add_variables(lower=0, upper=total_hours, coords=model.get_coords(['period', 'scenario'])), @@ -580,7 +580,7 @@ def test_flow_on(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['Sink(Wärme)|active_hours'], flow.submodel.variables['Sink(Wärme)|active_hours'] - == (flow.submodel.variables['Sink(Wärme)|status'] * model.hours_per_step).sum('time'), + == (flow.submodel.variables['Sink(Wärme)|status'] * model.timestep_duration).sum('time'), ) def test_effects_per_active_hour(self, basic_flow_system_linopy_coords, coords_config): @@ -635,13 +635,13 @@ def test_effects_per_active_hour(self, basic_flow_system_linopy_coords, coords_c assert_conequal( model.constraints['Sink(Wärme)->costs(temporal)'], model.variables['Sink(Wärme)->costs(temporal)'] - == flow.submodel.variables['Sink(Wärme)|status'] * model.hours_per_step * costs_per_running_hour, + == flow.submodel.variables['Sink(Wärme)|status'] * model.timestep_duration * costs_per_running_hour, ) assert_conequal( model.constraints['Sink(Wärme)->CO2(temporal)'], model.variables['Sink(Wärme)->CO2(temporal)'] - == flow.submodel.variables['Sink(Wärme)|status'] * model.hours_per_step * co2_per_running_hour, + == flow.submodel.variables['Sink(Wärme)|status'] * model.timestep_duration * co2_per_running_hour, ) def test_consecutive_on_hours(self, basic_flow_system_linopy_coords, coords_config): @@ -687,7 +687,7 @@ def test_consecutive_on_hours(self, basic_flow_system_linopy_coords, coords_conf model.add_variables(lower=0, upper=8, coords=model.get_coords()), ) - mega = model.hours_per_step.sum('time') + mega = model.timestep_duration.sum('time') assert_conequal( model.constraints['Sink(Wärme)|uptime|ub'], @@ -698,7 +698,7 @@ def test_consecutive_on_hours(self, basic_flow_system_linopy_coords, coords_conf model.constraints['Sink(Wärme)|uptime|forward'], model.variables['Sink(Wärme)|uptime'].isel(time=slice(1, None)) <= model.variables['Sink(Wärme)|uptime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)), + + model.timestep_duration.isel(time=slice(None, -1)), ) # eq: duration(t) >= duration(t - 1) + dt(t) + (On(t) - 1) * BIG @@ -706,14 +706,14 @@ def test_consecutive_on_hours(self, basic_flow_system_linopy_coords, coords_conf model.constraints['Sink(Wärme)|uptime|backward'], model.variables['Sink(Wärme)|uptime'].isel(time=slice(1, None)) >= model.variables['Sink(Wärme)|uptime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)) + + model.timestep_duration.isel(time=slice(None, -1)) + (model.variables['Sink(Wärme)|status'].isel(time=slice(1, None)) - 1) * mega, ) assert_conequal( model.constraints['Sink(Wärme)|uptime|initial'], model.variables['Sink(Wärme)|uptime'].isel(time=0) - == model.variables['Sink(Wärme)|status'].isel(time=0) * model.hours_per_step.isel(time=0), + == model.variables['Sink(Wärme)|status'].isel(time=0) * model.timestep_duration.isel(time=0), ) assert_conequal( @@ -768,7 +768,7 @@ def test_consecutive_on_hours_previous(self, basic_flow_system_linopy_coords, co model.add_variables(lower=0, upper=8, coords=model.get_coords()), ) - mega = model.hours_per_step.sum('time') + model.hours_per_step.isel(time=0) * 3 + mega = model.timestep_duration.sum('time') + model.timestep_duration.isel(time=0) * 3 assert_conequal( model.constraints['Sink(Wärme)|uptime|ub'], @@ -779,7 +779,7 @@ def test_consecutive_on_hours_previous(self, basic_flow_system_linopy_coords, co model.constraints['Sink(Wärme)|uptime|forward'], model.variables['Sink(Wärme)|uptime'].isel(time=slice(1, None)) <= model.variables['Sink(Wärme)|uptime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)), + + model.timestep_duration.isel(time=slice(None, -1)), ) # eq: duration(t) >= duration(t - 1) + dt(t) + (On(t) - 1) * BIG @@ -787,14 +787,14 @@ def test_consecutive_on_hours_previous(self, basic_flow_system_linopy_coords, co model.constraints['Sink(Wärme)|uptime|backward'], model.variables['Sink(Wärme)|uptime'].isel(time=slice(1, None)) >= model.variables['Sink(Wärme)|uptime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)) + + model.timestep_duration.isel(time=slice(None, -1)) + (model.variables['Sink(Wärme)|status'].isel(time=slice(1, None)) - 1) * mega, ) assert_conequal( model.constraints['Sink(Wärme)|uptime|initial'], model.variables['Sink(Wärme)|uptime'].isel(time=0) - == model.variables['Sink(Wärme)|status'].isel(time=0) * (model.hours_per_step.isel(time=0) * (1 + 3)), + == model.variables['Sink(Wärme)|status'].isel(time=0) * (model.timestep_duration.isel(time=0) * (1 + 3)), ) assert_conequal( @@ -850,7 +850,9 @@ def test_consecutive_off_hours(self, basic_flow_system_linopy_coords, coords_con model.add_variables(lower=0, upper=12, coords=model.get_coords()), ) - mega = model.hours_per_step.sum('time') + model.hours_per_step.isel(time=0) * 1 # previously inactive for 1h + mega = ( + model.timestep_duration.sum('time') + model.timestep_duration.isel(time=0) * 1 + ) # previously inactive for 1h assert_conequal( model.constraints['Sink(Wärme)|downtime|ub'], @@ -861,7 +863,7 @@ def test_consecutive_off_hours(self, basic_flow_system_linopy_coords, coords_con model.constraints['Sink(Wärme)|downtime|forward'], model.variables['Sink(Wärme)|downtime'].isel(time=slice(1, None)) <= model.variables['Sink(Wärme)|downtime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)), + + model.timestep_duration.isel(time=slice(None, -1)), ) # eq: duration(t) >= duration(t - 1) + dt(t) + (On(t) - 1) * BIG @@ -869,14 +871,14 @@ def test_consecutive_off_hours(self, basic_flow_system_linopy_coords, coords_con model.constraints['Sink(Wärme)|downtime|backward'], model.variables['Sink(Wärme)|downtime'].isel(time=slice(1, None)) >= model.variables['Sink(Wärme)|downtime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)) + + model.timestep_duration.isel(time=slice(None, -1)) + (model.variables['Sink(Wärme)|inactive'].isel(time=slice(1, None)) - 1) * mega, ) assert_conequal( model.constraints['Sink(Wärme)|downtime|initial'], model.variables['Sink(Wärme)|downtime'].isel(time=0) - == model.variables['Sink(Wärme)|inactive'].isel(time=0) * (model.hours_per_step.isel(time=0) * (1 + 1)), + == model.variables['Sink(Wärme)|inactive'].isel(time=0) * (model.timestep_duration.isel(time=0) * (1 + 1)), ) assert_conequal( @@ -933,7 +935,7 @@ def test_consecutive_off_hours_previous(self, basic_flow_system_linopy_coords, c model.add_variables(lower=0, upper=12, coords=model.get_coords()), ) - mega = model.hours_per_step.sum('time') + model.hours_per_step.isel(time=0) * 2 + mega = model.timestep_duration.sum('time') + model.timestep_duration.isel(time=0) * 2 assert_conequal( model.constraints['Sink(Wärme)|downtime|ub'], @@ -944,7 +946,7 @@ def test_consecutive_off_hours_previous(self, basic_flow_system_linopy_coords, c model.constraints['Sink(Wärme)|downtime|forward'], model.variables['Sink(Wärme)|downtime'].isel(time=slice(1, None)) <= model.variables['Sink(Wärme)|downtime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)), + + model.timestep_duration.isel(time=slice(None, -1)), ) # eq: duration(t) >= duration(t - 1) + dt(t) + (On(t) - 1) * BIG @@ -952,14 +954,14 @@ def test_consecutive_off_hours_previous(self, basic_flow_system_linopy_coords, c model.constraints['Sink(Wärme)|downtime|backward'], model.variables['Sink(Wärme)|downtime'].isel(time=slice(1, None)) >= model.variables['Sink(Wärme)|downtime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)) + + model.timestep_duration.isel(time=slice(None, -1)) + (model.variables['Sink(Wärme)|inactive'].isel(time=slice(1, None)) - 1) * mega, ) assert_conequal( model.constraints['Sink(Wärme)|downtime|initial'], model.variables['Sink(Wärme)|downtime'].isel(time=0) - == model.variables['Sink(Wärme)|inactive'].isel(time=0) * (model.hours_per_step.isel(time=0) * (1 + 2)), + == model.variables['Sink(Wärme)|inactive'].isel(time=0) * (model.timestep_duration.isel(time=0) * (1 + 2)), ) assert_conequal( @@ -1067,7 +1069,7 @@ def test_on_hours_limits(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['Sink(Wärme)|active_hours'], flow.submodel.variables['Sink(Wärme)|active_hours'] - == (flow.submodel.variables['Sink(Wärme)|status'] * model.hours_per_step).sum('time'), + == (flow.submodel.variables['Sink(Wärme)|status'] * model.timestep_duration).sum('time'), ) @@ -1131,7 +1133,7 @@ def test_flow_on_invest_optional(self, basic_flow_system_linopy_coords, coords_c model.add_variables(binary=True, coords=model.get_coords()), ) # Upper bound is total hours when active_hours_max is not specified - total_hours = model.hours_per_step.sum('time') + total_hours = model.timestep_duration.sum('time') assert_var_equal( model.variables['Sink(Wärme)|active_hours'], model.add_variables(lower=0, upper=total_hours, coords=model.get_coords(['period', 'scenario'])), @@ -1157,7 +1159,7 @@ def test_flow_on_invest_optional(self, basic_flow_system_linopy_coords, coords_c assert_conequal( model.constraints['Sink(Wärme)|active_hours'], flow.submodel.variables['Sink(Wärme)|active_hours'] - == (flow.submodel.variables['Sink(Wärme)|status'] * model.hours_per_step).sum('time'), + == (flow.submodel.variables['Sink(Wärme)|status'] * model.timestep_duration).sum('time'), ) # Investment @@ -1233,7 +1235,7 @@ def test_flow_on_invest_non_optional(self, basic_flow_system_linopy_coords, coor model.add_variables(binary=True, coords=model.get_coords()), ) # Upper bound is total hours when active_hours_max is not specified - total_hours = model.hours_per_step.sum('time') + total_hours = model.timestep_duration.sum('time') assert_var_equal( model.variables['Sink(Wärme)|active_hours'], model.add_variables(lower=0, upper=total_hours, coords=model.get_coords(['period', 'scenario'])), @@ -1251,7 +1253,7 @@ def test_flow_on_invest_non_optional(self, basic_flow_system_linopy_coords, coor assert_conequal( model.constraints['Sink(Wärme)|active_hours'], flow.submodel.variables['Sink(Wärme)|active_hours'] - == (flow.submodel.variables['Sink(Wärme)|status'] * model.hours_per_step).sum('time'), + == (flow.submodel.variables['Sink(Wärme)|status'] * model.timestep_duration).sum('time'), ) # Investment diff --git a/tests/deprecated/test_flow_system_resample.py b/tests/deprecated/test_flow_system_resample.py index c76946f80..549f05208 100644 --- a/tests/deprecated/test_flow_system_resample.py +++ b/tests/deprecated/test_flow_system_resample.py @@ -128,7 +128,7 @@ def test_time_metadata_updated(simple_fs): """Test time metadata correctly updated.""" fs_r = simple_fs.resample('3h', method='mean') assert len(fs_r.timesteps) == 8 - assert_allclose(fs_r.hours_per_timestep.values, 3.0) + assert_allclose(fs_r.timestep_duration.values, 3.0) assert fs_r.hours_of_last_timestep == 3.0 diff --git a/tests/deprecated/test_integration.py b/tests/deprecated/test_integration.py index 2f083b4fb..9b05a5c10 100644 --- a/tests/deprecated/test_integration.py +++ b/tests/deprecated/test_integration.py @@ -282,12 +282,12 @@ def modeling_calculation(self, request, flow_system_long, highs_solver): 'aggModel', flow_system, fx.ClusteringParameters( - hours_per_period=6, - nr_of_periods=4, - fix_storage_flows=False, - aggregate_data_and_fix_non_binary_vars=True, - percentage_of_period_freedom=0, - penalty_of_period_freedom=0, + n_clusters=4, + cluster_duration='6h', + include_storage=False, + aggregate_data=True, + flexibility_percent=0, + flexibility_penalty=0, time_series_for_low_peaks=[electrical_load_ts, thermal_load_ts], time_series_for_high_peaks=[thermal_load_ts], ), diff --git a/tests/deprecated/test_linear_converter.py b/tests/deprecated/test_linear_converter.py index 57b911d64..d20d104d0 100644 --- a/tests/deprecated/test_linear_converter.py +++ b/tests/deprecated/test_linear_converter.py @@ -174,7 +174,7 @@ def test_linear_converter_with_status(self, basic_flow_system_linopy_coords, coo assert_conequal( model.constraints['Converter|active_hours'], model.variables['Converter|active_hours'] - == (model.variables['Converter|status'] * model.hours_per_step).sum('time'), + == (model.variables['Converter|status'] * model.timestep_duration).sum('time'), ) # Check conversion constraint @@ -188,7 +188,7 @@ def test_linear_converter_with_status(self, basic_flow_system_linopy_coords, coo assert_conequal( model.constraints['Converter->costs(temporal)'], model.variables['Converter->costs(temporal)'] - == model.variables['Converter|status'] * model.hours_per_step * 5, + == model.variables['Converter|status'] * model.timestep_duration * 5, ) def test_linear_converter_multidimensional(self, basic_flow_system_linopy_coords, coords_config): @@ -485,7 +485,7 @@ def test_piecewise_conversion_with_status(self, basic_flow_system_linopy_coords, assert 'Converter|active_hours' in model.constraints assert_conequal( model.constraints['Converter|active_hours'], - model['Converter|active_hours'] == (model['Converter|status'] * model.hours_per_step).sum('time'), + model['Converter|active_hours'] == (model['Converter|status'] * model.timestep_duration).sum('time'), ) # Verify that the costs effect is applied @@ -493,7 +493,7 @@ def test_piecewise_conversion_with_status(self, basic_flow_system_linopy_coords, assert_conequal( model.constraints['Converter->costs(temporal)'], model.variables['Converter->costs(temporal)'] - == model.variables['Converter|status'] * model.hours_per_step * 5, + == model.variables['Converter|status'] * model.timestep_duration * 5, ) diff --git a/tests/deprecated/test_on_hours_computation.py b/tests/deprecated/test_on_hours_computation.py index 578fd7792..c74332565 100644 --- a/tests/deprecated/test_on_hours_computation.py +++ b/tests/deprecated/test_on_hours_computation.py @@ -9,7 +9,7 @@ class TestComputeConsecutiveDuration: """Tests for the compute_consecutive_hours_in_state static method.""" @pytest.mark.parametrize( - 'binary_values, hours_per_timestep, expected', + 'binary_values, timestep_duration, expected', [ # Case 1: Single timestep DataArrays (xr.DataArray([1], dims=['time']), 5, 5), @@ -26,22 +26,22 @@ class TestComputeConsecutiveDuration: (xr.DataArray([0, 1, 1, 1, 0, 0], dims=['time']), 1, 0), # ends with 0 ], ) - def test_compute_duration(self, binary_values, hours_per_timestep, expected): + def test_compute_duration(self, binary_values, timestep_duration, expected): """Test compute_consecutive_hours_in_state with various inputs.""" - result = ModelingUtilities.compute_consecutive_hours_in_state(binary_values, hours_per_timestep) + result = ModelingUtilities.compute_consecutive_hours_in_state(binary_values, timestep_duration) assert np.isclose(result, expected) @pytest.mark.parametrize( - 'binary_values, hours_per_timestep', + 'binary_values, timestep_duration', [ - # Case: hours_per_timestep must be scalar + # Case: timestep_duration must be scalar (xr.DataArray([1, 1, 1, 1, 1], dims=['time']), np.array([1, 2])), ], ) - def test_compute_duration_raises_error(self, binary_values, hours_per_timestep): + def test_compute_duration_raises_error(self, binary_values, timestep_duration): """Test error conditions.""" with pytest.raises(TypeError): - ModelingUtilities.compute_consecutive_hours_in_state(binary_values, hours_per_timestep) + ModelingUtilities.compute_consecutive_hours_in_state(binary_values, timestep_duration) class TestComputePreviousOnStates: diff --git a/tests/deprecated/test_storage.py b/tests/deprecated/test_storage.py index 15170a321..3fd47fbf8 100644 --- a/tests/deprecated/test_storage.py +++ b/tests/deprecated/test_storage.py @@ -73,8 +73,8 @@ def test_basic_storage(self, basic_flow_system_linopy_coords, coords_config): model.constraints['TestStorage|charge_state'], charge_state.isel(time=slice(1, None)) == charge_state.isel(time=slice(None, -1)) - + model.variables['TestStorage(Q_th_in)|flow_rate'] * model.hours_per_step - - model.variables['TestStorage(Q_th_out)|flow_rate'] * model.hours_per_step, + + model.variables['TestStorage(Q_th_in)|flow_rate'] * model.timestep_duration + - model.variables['TestStorage(Q_th_out)|flow_rate'] * model.timestep_duration, ) # Check initial charge state constraint assert_conequal( @@ -146,7 +146,7 @@ def test_lossy_storage(self, basic_flow_system_linopy_coords, coords_config): charge_state = model.variables['TestStorage|charge_state'] rel_loss = 0.05 - hours_per_step = model.hours_per_step + timestep_duration = model.timestep_duration charge_rate = model.variables['TestStorage(Q_th_in)|flow_rate'] discharge_rate = model.variables['TestStorage(Q_th_out)|flow_rate'] eff_charge = 0.9 @@ -155,9 +155,9 @@ def test_lossy_storage(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['TestStorage|charge_state'], charge_state.isel(time=slice(1, None)) - == charge_state.isel(time=slice(None, -1)) * (1 - rel_loss) ** hours_per_step - + charge_rate * eff_charge * hours_per_step - - discharge_rate / eff_discharge * hours_per_step, + == charge_state.isel(time=slice(None, -1)) * (1 - rel_loss) ** timestep_duration + + charge_rate * eff_charge * timestep_duration + - discharge_rate / eff_discharge * timestep_duration, ) # Check initial charge state constraint @@ -242,8 +242,8 @@ def test_charge_state_bounds(self, basic_flow_system_linopy_coords, coords_confi model.constraints['TestStorage|charge_state'], charge_state.isel(time=slice(1, None)) == charge_state.isel(time=slice(None, -1)) - + model.variables['TestStorage(Q_th_in)|flow_rate'] * model.hours_per_step - - model.variables['TestStorage(Q_th_out)|flow_rate'] * model.hours_per_step, + + model.variables['TestStorage(Q_th_in)|flow_rate'] * model.timestep_duration + - model.variables['TestStorage(Q_th_out)|flow_rate'] * model.timestep_duration, ) # Check initial charge state constraint assert_conequal( diff --git a/tests/test_bus.py b/tests/test_bus.py index cc49a2073..9bb7ddbe3 100644 --- a/tests/test_bus.py +++ b/tests/test_bus.py @@ -74,8 +74,8 @@ def test_bus_penalty(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['TestBus->Penalty(temporal)'], model.variables['TestBus->Penalty(temporal)'] - == model.variables['TestBus|virtual_supply'] * 1e5 * model.hours_per_step - + model.variables['TestBus|virtual_demand'] * 1e5 * model.hours_per_step, + == model.variables['TestBus|virtual_supply'] * 1e5 * model.timestep_duration + + model.variables['TestBus|virtual_demand'] * 1e5 * model.timestep_duration, ) def test_bus_with_coords(self, basic_flow_system_linopy_coords, coords_config): diff --git a/tests/test_cluster_reduce_expand.py b/tests/test_cluster_reduce_expand.py new file mode 100644 index 000000000..9b8095422 --- /dev/null +++ b/tests/test_cluster_reduce_expand.py @@ -0,0 +1,347 @@ +"""Tests for cluster() and expand_solution() functionality.""" + +import numpy as np +import pandas as pd +import pytest +from numpy.testing import assert_allclose + +import flixopt as fx + + +def create_simple_system(timesteps: pd.DatetimeIndex) -> fx.FlowSystem: + """Create a simple FlowSystem for testing clustering.""" + # Create varying demand - different for each day to test clustering + hours = len(timesteps) + demand = np.sin(np.linspace(0, 4 * np.pi, hours)) * 10 + 15 # Oscillating demand + + flow_system = fx.FlowSystem(timesteps) + flow_system.add_elements( + fx.Bus('Heat'), + fx.Bus('Gas'), + fx.Effect('costs', '€', is_standard=True, is_objective=True), + fx.Sink('HeatDemand', inputs=[fx.Flow('Q', bus='Heat', fixed_relative_profile=demand, size=1)]), + fx.Source('GasSource', outputs=[fx.Flow('Gas', bus='Gas', effects_per_flow_hour=0.05)]), + fx.linear_converters.Boiler( + 'Boiler', + thermal_efficiency=0.9, + fuel_flow=fx.Flow('Q_fu', bus='Gas'), + thermal_flow=fx.Flow('Q_th', bus='Heat'), + ), + ) + return flow_system + + +@pytest.fixture +def timesteps_2_days(): + """48 hour timesteps (2 days).""" + return pd.date_range('2020-01-01', periods=48, freq='h') + + +@pytest.fixture +def timesteps_8_days(): + """192 hour timesteps (8 days) - more realistic for clustering.""" + return pd.date_range('2020-01-01', periods=192, freq='h') + + +def test_cluster_creates_reduced_timesteps(timesteps_8_days): + """Test that cluster creates a FlowSystem with fewer timesteps.""" + fs = create_simple_system(timesteps_8_days) + + # Reduce to 2 typical clusters (days) + fs_reduced = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + ) + + # Should have 2 * 24 = 48 timesteps instead of 192 + assert len(fs_reduced.timesteps) == 48 + assert hasattr(fs_reduced, 'clustering') + assert fs_reduced.clustering.result.cluster_structure.n_clusters == 2 + + +def test_expand_solution_restores_full_timesteps(solver_fixture, timesteps_8_days): + """Test that expand_solution restores full timestep count.""" + fs = create_simple_system(timesteps_8_days) + + # Reduce to 2 typical clusters + fs_reduced = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + ) + + # Optimize + fs_reduced.optimize(solver_fixture) + assert fs_reduced.solution is not None + assert len(fs_reduced.timesteps) == 48 + + # Expand back to full + fs_expanded = fs_reduced.transform.expand_solution() + + # Should have original timestep count + assert len(fs_expanded.timesteps) == 192 + assert fs_expanded.solution is not None + + +def test_expand_solution_preserves_solution_variables(solver_fixture, timesteps_8_days): + """Test that expand_solution keeps all solution variables.""" + fs = create_simple_system(timesteps_8_days) + + fs_reduced = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + ) + fs_reduced.optimize(solver_fixture) + + reduced_vars = set(fs_reduced.solution.data_vars) + + fs_expanded = fs_reduced.transform.expand_solution() + expanded_vars = set(fs_expanded.solution.data_vars) + + # Should have all the same variables + assert reduced_vars == expanded_vars + + +def test_expand_solution_maps_values_correctly(solver_fixture, timesteps_8_days): + """Test that expand_solution correctly maps typical cluster values to all segments.""" + fs = create_simple_system(timesteps_8_days) + + fs_reduced = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + ) + fs_reduced.optimize(solver_fixture) + + # Get cluster_order to know mapping + info = fs_reduced.clustering + cluster_order = info.result.cluster_structure.cluster_order.values + timesteps_per_cluster = info.result.cluster_structure.timesteps_per_cluster # 24 + + reduced_flow = fs_reduced.solution['Boiler(Q_th)|flow_rate'].values + + fs_expanded = fs_reduced.transform.expand_solution() + expanded_flow = fs_expanded.solution['Boiler(Q_th)|flow_rate'].values + + # Check that values are correctly mapped + # For each original segment, values should match the corresponding typical cluster + for orig_segment_idx, cluster_id in enumerate(cluster_order): + orig_start = orig_segment_idx * timesteps_per_cluster + orig_end = orig_start + timesteps_per_cluster + + typical_start = cluster_id * timesteps_per_cluster + typical_end = typical_start + timesteps_per_cluster + + # Values in the expanded solution for this original segment + # should match the reduced solution for the corresponding typical cluster + expected = reduced_flow[typical_start:typical_end] + actual = expanded_flow[orig_start:orig_end] + + assert_allclose(actual, expected, rtol=1e-10) + + +def test_expand_solution_enables_statistics_accessor(solver_fixture, timesteps_8_days): + """Test that statistics accessor works on expanded FlowSystem.""" + fs = create_simple_system(timesteps_8_days) + + fs_reduced = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + ) + fs_reduced.optimize(solver_fixture) + + fs_expanded = fs_reduced.transform.expand_solution() + + # These should work without errors + flow_rates = fs_expanded.statistics.flow_rates + assert 'Boiler(Q_th)' in flow_rates + assert len(flow_rates['Boiler(Q_th)'].coords['time']) == 192 + + flow_hours = fs_expanded.statistics.flow_hours + assert 'Boiler(Q_th)' in flow_hours + + +def test_expand_solution_statistics_match_clustered(solver_fixture, timesteps_8_days): + """Test that total_effects match between clustered and expanded FlowSystem.""" + fs = create_simple_system(timesteps_8_days) + + fs_reduced = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + ) + fs_reduced.optimize(solver_fixture) + + fs_expanded = fs_reduced.transform.expand_solution() + + # Total effects should match between clustered and expanded + reduced_total = fs_reduced.statistics.total_effects['costs'].sum('contributor').item() + expanded_total = fs_expanded.statistics.total_effects['costs'].sum('contributor').item() + + assert_allclose(reduced_total, expanded_total, rtol=1e-6) + + # Flow hours should also match (need to sum over time with proper weighting) + reduced_flow_hours = ( + (fs_reduced.statistics.flow_hours['Boiler(Q_th)'] * fs_reduced.cluster_weight).sum('time').item() + ) + expanded_flow_hours = ( + (fs_expanded.statistics.flow_hours['Boiler(Q_th)'] * fs_expanded.cluster_weight).sum('time').item() + ) + + assert_allclose(reduced_flow_hours, expanded_flow_hours, rtol=1e-6) + + +def test_expand_solution_withoutclustering_raises(solver_fixture, timesteps_2_days): + """Test that expand_solution raises error if not a reduced FlowSystem.""" + fs = create_simple_system(timesteps_2_days) + fs.optimize(solver_fixture) + + with pytest.raises(ValueError, match='cluster'): + fs.transform.expand_solution() + + +def test_expand_solution_without_solution_raises(timesteps_8_days): + """Test that expand_solution raises error if no solution.""" + fs = create_simple_system(timesteps_8_days) + + fs_reduced = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + ) + # Don't optimize - no solution + + with pytest.raises(ValueError, match='no solution'): + fs_reduced.transform.expand_solution() + + +# ==================== Multi-dimensional Tests ==================== + + +def create_system_with_scenarios(timesteps: pd.DatetimeIndex, scenarios: pd.Index) -> fx.FlowSystem: + """Create a FlowSystem with scenarios for testing.""" + hours = len(timesteps) + + # Create different demand profiles per scenario + demands = {} + for i, scenario in enumerate(scenarios): + # Different pattern per scenario + base_demand = np.sin(np.linspace(0, 4 * np.pi, hours)) * 10 + 15 + demands[scenario] = base_demand * (1 + 0.2 * i) # Scale differently per scenario + + # Create DataFrame with scenarios as columns + demand_df = pd.DataFrame(demands, index=timesteps) + + flow_system = fx.FlowSystem(timesteps, scenarios=scenarios) + flow_system.add_elements( + fx.Bus('Heat'), + fx.Bus('Gas'), + fx.Effect('costs', '€', is_standard=True, is_objective=True), + fx.Sink( + 'HeatDemand', + inputs=[fx.Flow('Q', bus='Heat', fixed_relative_profile=demand_df, size=1)], + ), + fx.Source('GasSource', outputs=[fx.Flow('Gas', bus='Gas', effects_per_flow_hour=0.05)]), + fx.linear_converters.Boiler( + 'Boiler', + thermal_efficiency=0.9, + fuel_flow=fx.Flow('Q_fu', bus='Gas'), + thermal_flow=fx.Flow('Q_th', bus='Heat'), + ), + ) + return flow_system + + +@pytest.fixture +def scenarios_2(): + """Two scenarios for testing.""" + return pd.Index(['base', 'high'], name='scenario') + + +def test_cluster_with_scenarios(timesteps_8_days, scenarios_2): + """Test that cluster handles scenarios correctly.""" + fs = create_system_with_scenarios(timesteps_8_days, scenarios_2) + + # Verify scenarios are set up correctly + assert fs.scenarios is not None + assert len(fs.scenarios) == 2 + + # Reduce to 2 typical clusters + fs_reduced = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + ) + + # Should have 2 * 24 = 48 timesteps + assert len(fs_reduced.timesteps) == 48 + + # Should have aggregation info with cluster structure + info = fs_reduced.clustering + assert info is not None + assert info.result.cluster_structure is not None + assert info.result.cluster_structure.n_clusters == 2 + # Original FlowSystem had scenarios + assert info.original_flow_system.scenarios is not None + + +def test_cluster_and_expand_with_scenarios(solver_fixture, timesteps_8_days, scenarios_2): + """Test full cluster -> optimize -> expand_solution cycle with scenarios.""" + fs = create_system_with_scenarios(timesteps_8_days, scenarios_2) + + # Reduce + fs_reduced = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + ) + + # Optimize + fs_reduced.optimize(solver_fixture) + assert fs_reduced.solution is not None + + # Expand + fs_expanded = fs_reduced.transform.expand_solution() + + # Should have original timesteps + assert len(fs_expanded.timesteps) == 192 + + # Solution should have scenario dimension + flow_var = 'Boiler(Q_th)|flow_rate' + assert flow_var in fs_expanded.solution + assert 'scenario' in fs_expanded.solution[flow_var].dims + assert len(fs_expanded.solution[flow_var].coords['time']) == 192 + + +def test_expand_solution_maps_scenarios_independently(solver_fixture, timesteps_8_days, scenarios_2): + """Test that expand_solution correctly maps scenarios in multi-scenario systems.""" + fs = create_system_with_scenarios(timesteps_8_days, scenarios_2) + + fs_reduced = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + ) + fs_reduced.optimize(solver_fixture) + + info = fs_reduced.clustering + cluster_structure = info.result.cluster_structure + timesteps_per_cluster = cluster_structure.timesteps_per_cluster # 24 + + reduced_flow = fs_reduced.solution['Boiler(Q_th)|flow_rate'] + fs_expanded = fs_reduced.transform.expand_solution() + expanded_flow = fs_expanded.solution['Boiler(Q_th)|flow_rate'] + + # Check mapping for each scenario using its own cluster_order + for scenario in scenarios_2: + # Get the cluster_order for THIS scenario + cluster_order = cluster_structure.get_cluster_order_for_slice(scenario=scenario) + + reduced_scenario = reduced_flow.sel(scenario=scenario).values + expanded_scenario = expanded_flow.sel(scenario=scenario).values + + # Verify mapping is correct for this scenario using its own cluster_order + for orig_segment_idx, cluster_id in enumerate(cluster_order): + orig_start = orig_segment_idx * timesteps_per_cluster + orig_end = orig_start + timesteps_per_cluster + + typical_start = cluster_id * timesteps_per_cluster + typical_end = typical_start + timesteps_per_cluster + + expected = reduced_scenario[typical_start:typical_end] + actual = expanded_scenario[orig_start:orig_end] + + assert_allclose(actual, expected, rtol=1e-10, err_msg=f'Mismatch for scenario {scenario}') diff --git a/tests/test_clustering/__init__.py b/tests/test_clustering/__init__.py new file mode 100644 index 000000000..4a026052c --- /dev/null +++ b/tests/test_clustering/__init__.py @@ -0,0 +1 @@ +"""Tests for the flixopt.aggregation module.""" diff --git a/tests/test_clustering/test_base.py b/tests/test_clustering/test_base.py new file mode 100644 index 000000000..a6c4d8cc7 --- /dev/null +++ b/tests/test_clustering/test_base.py @@ -0,0 +1,159 @@ +"""Tests for flixopt.clustering.base module.""" + +import numpy as np +import pytest +import xarray as xr + +from flixopt.clustering import ( + Clustering, + ClusterResult, + ClusterStructure, + create_cluster_structure_from_mapping, +) + + +class TestClusterStructure: + """Tests for ClusterStructure dataclass.""" + + def test_basic_creation(self): + """Test basic ClusterStructure creation.""" + cluster_order = xr.DataArray([0, 1, 0, 1, 2, 0], dims=['original_period']) + cluster_occurrences = xr.DataArray([3, 2, 1], dims=['cluster']) + + structure = ClusterStructure( + cluster_order=cluster_order, + cluster_occurrences=cluster_occurrences, + n_clusters=3, + timesteps_per_cluster=24, + ) + + assert structure.n_clusters == 3 + assert structure.timesteps_per_cluster == 24 + assert structure.n_original_periods == 6 + + def test_creation_from_numpy(self): + """Test ClusterStructure creation from numpy arrays.""" + structure = ClusterStructure( + cluster_order=np.array([0, 0, 1, 1, 0]), + cluster_occurrences=np.array([3, 2]), + n_clusters=2, + timesteps_per_cluster=12, + ) + + assert isinstance(structure.cluster_order, xr.DataArray) + assert isinstance(structure.cluster_occurrences, xr.DataArray) + assert structure.n_original_periods == 5 + + def test_get_cluster_weight_per_timestep(self): + """Test weight calculation per timestep.""" + structure = ClusterStructure( + cluster_order=xr.DataArray([0, 1, 0], dims=['original_period']), + cluster_occurrences=xr.DataArray([2, 1], dims=['cluster']), + n_clusters=2, + timesteps_per_cluster=4, + ) + + weights = structure.get_cluster_weight_per_timestep() + + # Cluster 0 has 4 timesteps, each with weight 2 + # Cluster 1 has 4 timesteps, each with weight 1 + assert len(weights) == 8 + assert float(weights.isel(time=0).values) == 2.0 + assert float(weights.isel(time=4).values) == 1.0 + + +class TestClusterResult: + """Tests for ClusterResult dataclass.""" + + def test_basic_creation(self): + """Test basic ClusterResult creation.""" + result = ClusterResult( + timestep_mapping=xr.DataArray([0, 0, 1, 1, 2, 2], dims=['original_time']), + n_representatives=3, + representative_weights=xr.DataArray([2, 2, 2], dims=['time']), + ) + + assert result.n_representatives == 3 + assert result.n_original_timesteps == 6 + + def test_creation_from_numpy(self): + """Test ClusterResult creation from numpy arrays.""" + result = ClusterResult( + timestep_mapping=np.array([0, 1, 0, 1]), + n_representatives=2, + representative_weights=np.array([2.0, 2.0]), + ) + + assert isinstance(result.timestep_mapping, xr.DataArray) + assert isinstance(result.representative_weights, xr.DataArray) + + def test_validation_success(self): + """Test validation passes for valid result.""" + result = ClusterResult( + timestep_mapping=xr.DataArray([0, 1, 0, 1], dims=['original_time']), + n_representatives=2, + representative_weights=xr.DataArray([2.0, 2.0], dims=['time']), + ) + + # Should not raise + result.validate() + + def test_validation_invalid_mapping(self): + """Test validation fails for out-of-range mapping.""" + result = ClusterResult( + timestep_mapping=xr.DataArray([0, 5, 0, 1], dims=['original_time']), # 5 is out of range + n_representatives=2, + representative_weights=xr.DataArray([2.0, 2.0], dims=['time']), + ) + + with pytest.raises(ValueError, match='timestep_mapping contains index'): + result.validate() + + def test_get_expansion_mapping(self): + """Test get_expansion_mapping returns named DataArray.""" + result = ClusterResult( + timestep_mapping=xr.DataArray([0, 1, 0], dims=['original_time']), + n_representatives=2, + representative_weights=xr.DataArray([2.0, 1.0], dims=['time']), + ) + + mapping = result.get_expansion_mapping() + assert mapping.name == 'expansion_mapping' + + +class TestCreateClusterStructureFromMapping: + """Tests for create_cluster_structure_from_mapping function.""" + + def test_basic_creation(self): + """Test creating ClusterStructure from timestep mapping.""" + # 12 original timesteps, 4 per period, 3 periods + # Mapping: period 0 -> cluster 0, period 1 -> cluster 1, period 2 -> cluster 0 + mapping = xr.DataArray( + [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3], # First and third period map to cluster 0 + dims=['original_time'], + ) + + structure = create_cluster_structure_from_mapping(mapping, timesteps_per_cluster=4) + + assert structure.timesteps_per_cluster == 4 + assert structure.n_original_periods == 3 + + +class TestClustering: + """Tests for Clustering dataclass.""" + + def test_creation(self): + """Test Clustering creation.""" + result = ClusterResult( + timestep_mapping=xr.DataArray([0, 1], dims=['original_time']), + n_representatives=2, + representative_weights=xr.DataArray([1.0, 1.0], dims=['time']), + ) + + info = Clustering( + result=result, + original_flow_system=None, # Would be FlowSystem in practice + backend_name='tsam', + ) + + assert info.backend_name == 'tsam' diff --git a/tests/test_clustering/test_integration.py b/tests/test_clustering/test_integration.py new file mode 100644 index 000000000..e3c6083a0 --- /dev/null +++ b/tests/test_clustering/test_integration.py @@ -0,0 +1,148 @@ +"""Integration tests for flixopt.aggregation module with FlowSystem.""" + +import numpy as np +import pandas as pd +import pytest +import xarray as xr + +from flixopt import FlowSystem, TimeSeriesWeights + + +class TestTimeSeriesWeights: + """Tests for TimeSeriesWeights class.""" + + def test_creation(self): + """Test TimeSeriesWeights creation.""" + temporal = xr.DataArray([1.0, 1.0, 1.0], dims=['time']) + weights = TimeSeriesWeights(temporal=temporal) + + assert 'time' in weights.temporal.dims + assert float(weights.temporal.sum().values) == 3.0 + + def test_invalid_no_time_dim(self): + """Test error when temporal has no time dimension.""" + temporal = xr.DataArray([1.0, 1.0], dims=['other']) + + with pytest.raises(ValueError, match='time'): + TimeSeriesWeights(temporal=temporal) + + def test_sum_over_time(self): + """Test sum_over_time convenience method.""" + temporal = xr.DataArray([2.0, 3.0, 1.0], dims=['time'], coords={'time': [0, 1, 2]}) + weights = TimeSeriesWeights(temporal=temporal) + + data = xr.DataArray([10.0, 20.0, 30.0], dims=['time'], coords={'time': [0, 1, 2]}) + result = weights.sum_over_time(data) + + # 10*2 + 20*3 + 30*1 = 20 + 60 + 30 = 110 + assert float(result.values) == 110.0 + + def test_effective_objective(self): + """Test effective_objective property.""" + temporal = xr.DataArray([1.0, 1.0], dims=['time']) + objective = xr.DataArray([2.0, 2.0], dims=['time']) + + # Without override + weights1 = TimeSeriesWeights(temporal=temporal) + assert np.array_equal(weights1.effective_objective.values, temporal.values) + + # With override + weights2 = TimeSeriesWeights(temporal=temporal, objective=objective) + assert np.array_equal(weights2.effective_objective.values, objective.values) + + +class TestFlowSystemWeightsProperty: + """Tests for FlowSystem.weights property.""" + + def test_weights_property_exists(self): + """Test that FlowSystem has weights property.""" + fs = FlowSystem(timesteps=pd.date_range('2024-01-01', periods=24, freq='h')) + + weights = fs.weights + assert isinstance(weights, TimeSeriesWeights) + + def test_weights_temporal_equals_aggregation_weight(self): + """Test that weights.temporal equals aggregation_weight.""" + fs = FlowSystem(timesteps=pd.date_range('2024-01-01', periods=24, freq='h')) + + weights = fs.weights + aggregation_weight = fs.aggregation_weight + + np.testing.assert_array_almost_equal(weights.temporal.values, aggregation_weight.values) + + def test_weights_with_cluster_weight(self): + """Test weights property includes cluster_weight.""" + # Create FlowSystem with custom cluster_weight + timesteps = pd.date_range('2024-01-01', periods=24, freq='h') + cluster_weight = np.array([2.0] * 12 + [1.0] * 12) # First 12h weighted 2x + + fs = FlowSystem(timesteps=timesteps, cluster_weight=cluster_weight) + + weights = fs.weights + + # temporal = timestep_duration * cluster_weight + # timestep_duration is 1h for all, so temporal = cluster_weight + expected = 1.0 * cluster_weight + np.testing.assert_array_almost_equal(weights.temporal.values, expected) + + +class TestClusterMethod: + """Tests for FlowSystem.transform.cluster method.""" + + def test_cluster_method_exists(self): + """Test that transform.cluster method exists.""" + fs = FlowSystem(timesteps=pd.date_range('2024-01-01', periods=48, freq='h')) + + assert hasattr(fs.transform, 'cluster') + assert callable(fs.transform.cluster) + + def test_cluster_reduces_timesteps(self): + """Test that cluster reduces timesteps.""" + # This test requires tsam to be installed + pytest.importorskip('tsam') + from flixopt import Bus, Flow, Sink, Source + from flixopt.core import TimeSeriesData + + # Create FlowSystem with 7 days of data (168 hours) + n_hours = 168 # 7 days + fs = FlowSystem(timesteps=pd.date_range('2024-01-01', periods=n_hours, freq='h')) + + # Add some basic components with time series data + demand_data = np.sin(np.linspace(0, 14 * np.pi, n_hours)) + 2 # Varying demand over 7 days + bus = Bus('electricity') + # Bus label is passed as string to Flow + grid_flow = Flow('grid_in', bus='electricity', size=100) + demand_flow = Flow( + 'demand_out', bus='electricity', size=100, fixed_relative_profile=TimeSeriesData(demand_data / 100) + ) + source = Source('grid', outputs=[grid_flow]) + sink = Sink('demand', inputs=[demand_flow]) + fs.add_elements(source, sink, bus) + + # Reduce 7 days to 2 representative days + fs_clustered = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + ) + + # Check that timesteps were reduced (from 168 hours to 48 hours = 2 days x 24 hours) + assert len(fs_clustered.timesteps) < len(fs.timesteps) + assert len(fs_clustered.timesteps) == 48 # 2 representative days x 24 hours + + +class TestClusteringModuleImports: + """Tests for flixopt.clustering module imports.""" + + def test_import_from_flixopt(self): + """Test that clustering module can be imported from flixopt.""" + from flixopt import clustering + + assert hasattr(clustering, 'ClusterResult') + assert hasattr(clustering, 'ClusterStructure') + assert hasattr(clustering, 'Clustering') + + def test_create_cluster_structure_from_mapping_available(self): + """Test that create_cluster_structure_from_mapping is available.""" + from flixopt.clustering import create_cluster_structure_from_mapping + + assert callable(create_cluster_structure_from_mapping) diff --git a/tests/test_effect.py b/tests/test_effect.py index 015e054eb..60fbb0166 100644 --- a/tests/test_effect.py +++ b/tests/test_effect.py @@ -129,8 +129,8 @@ def test_bounds(self, basic_flow_system_linopy_coords, coords_config): assert_var_equal( model.variables['Effect1(temporal)|per_timestep'], model.add_variables( - lower=4.0 * model.hours_per_step, - upper=4.1 * model.hours_per_step, + lower=4.0 * model.timestep_duration, + upper=4.1 * model.timestep_duration, coords=model.get_coords(['time', 'period', 'scenario']), ), ) diff --git a/tests/test_flow.py b/tests/test_flow.py index 594bc1fbb..8e1ce1f53 100644 --- a/tests/test_flow.py +++ b/tests/test_flow.py @@ -23,7 +23,7 @@ def test_flow_minimal(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['Sink(Wärme)|total_flow_hours'], flow.submodel.variables['Sink(Wärme)|total_flow_hours'] - == (flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.hours_per_step).sum('time'), + == (flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.timestep_duration).sum('time'), ) assert_var_equal(flow.submodel.flow_rate, model.add_variables(lower=0, upper=100, coords=model.get_coords())) assert_var_equal( @@ -61,7 +61,7 @@ def test_flow(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['Sink(Wärme)|total_flow_hours'], flow.submodel.variables['Sink(Wärme)|total_flow_hours'] - == (flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.hours_per_step).sum('time'), + == (flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.timestep_duration).sum('time'), ) assert_var_equal( @@ -83,12 +83,12 @@ def test_flow(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['Sink(Wärme)|load_factor_min'], - flow.submodel.variables['Sink(Wärme)|total_flow_hours'] >= model.hours_per_step.sum('time') * 0.1 * 100, + flow.submodel.variables['Sink(Wärme)|total_flow_hours'] >= model.timestep_duration.sum('time') * 0.1 * 100, ) assert_conequal( model.constraints['Sink(Wärme)|load_factor_max'], - flow.submodel.variables['Sink(Wärme)|total_flow_hours'] <= model.hours_per_step.sum('time') * 0.9 * 100, + flow.submodel.variables['Sink(Wärme)|total_flow_hours'] <= model.timestep_duration.sum('time') * 0.9 * 100, ) assert_sets_equal( @@ -129,13 +129,13 @@ def test_effects_per_flow_hour(self, basic_flow_system_linopy_coords, coords_con assert_conequal( model.constraints['Sink(Wärme)->costs(temporal)'], model.variables['Sink(Wärme)->costs(temporal)'] - == flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.hours_per_step * costs_per_flow_hour, + == flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.timestep_duration * costs_per_flow_hour, ) assert_conequal( model.constraints['Sink(Wärme)->CO2(temporal)'], model.variables['Sink(Wärme)->CO2(temporal)'] - == flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.hours_per_step * co2_per_flow_hour, + == flow.submodel.variables['Sink(Wärme)|flow_rate'] * model.timestep_duration * co2_per_flow_hour, ) @@ -561,7 +561,7 @@ def test_flow_on(self, basic_flow_system_linopy_coords, coords_config): model.add_variables(binary=True, coords=model.get_coords()), ) # Upper bound is total hours when active_hours_max is not specified - total_hours = model.hours_per_step.sum('time') + total_hours = model.timestep_duration.sum('time') assert_var_equal( model.variables['Sink(Wärme)|active_hours'], model.add_variables(lower=0, upper=total_hours, coords=model.get_coords(['period', 'scenario'])), @@ -580,7 +580,7 @@ def test_flow_on(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['Sink(Wärme)|active_hours'], flow.submodel.variables['Sink(Wärme)|active_hours'] - == (flow.submodel.variables['Sink(Wärme)|status'] * model.hours_per_step).sum('time'), + == (flow.submodel.variables['Sink(Wärme)|status'] * model.timestep_duration).sum('time'), ) def test_effects_per_active_hour(self, basic_flow_system_linopy_coords, coords_config): @@ -635,13 +635,13 @@ def test_effects_per_active_hour(self, basic_flow_system_linopy_coords, coords_c assert_conequal( model.constraints['Sink(Wärme)->costs(temporal)'], model.variables['Sink(Wärme)->costs(temporal)'] - == flow.submodel.variables['Sink(Wärme)|status'] * model.hours_per_step * costs_per_running_hour, + == flow.submodel.variables['Sink(Wärme)|status'] * model.timestep_duration * costs_per_running_hour, ) assert_conequal( model.constraints['Sink(Wärme)->CO2(temporal)'], model.variables['Sink(Wärme)->CO2(temporal)'] - == flow.submodel.variables['Sink(Wärme)|status'] * model.hours_per_step * co2_per_running_hour, + == flow.submodel.variables['Sink(Wärme)|status'] * model.timestep_duration * co2_per_running_hour, ) def test_consecutive_on_hours(self, basic_flow_system_linopy_coords, coords_config): @@ -687,7 +687,7 @@ def test_consecutive_on_hours(self, basic_flow_system_linopy_coords, coords_conf model.add_variables(lower=0, upper=8, coords=model.get_coords()), ) - mega = model.hours_per_step.sum('time') + mega = model.timestep_duration.sum('time') assert_conequal( model.constraints['Sink(Wärme)|uptime|ub'], @@ -698,7 +698,7 @@ def test_consecutive_on_hours(self, basic_flow_system_linopy_coords, coords_conf model.constraints['Sink(Wärme)|uptime|forward'], model.variables['Sink(Wärme)|uptime'].isel(time=slice(1, None)) <= model.variables['Sink(Wärme)|uptime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)), + + model.timestep_duration.isel(time=slice(None, -1)), ) # eq: duration(t) >= duration(t - 1) + dt(t) + (On(t) - 1) * BIG @@ -706,14 +706,14 @@ def test_consecutive_on_hours(self, basic_flow_system_linopy_coords, coords_conf model.constraints['Sink(Wärme)|uptime|backward'], model.variables['Sink(Wärme)|uptime'].isel(time=slice(1, None)) >= model.variables['Sink(Wärme)|uptime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)) + + model.timestep_duration.isel(time=slice(None, -1)) + (model.variables['Sink(Wärme)|status'].isel(time=slice(1, None)) - 1) * mega, ) assert_conequal( model.constraints['Sink(Wärme)|uptime|initial'], model.variables['Sink(Wärme)|uptime'].isel(time=0) - == model.variables['Sink(Wärme)|status'].isel(time=0) * model.hours_per_step.isel(time=0), + == model.variables['Sink(Wärme)|status'].isel(time=0) * model.timestep_duration.isel(time=0), ) assert_conequal( @@ -768,7 +768,7 @@ def test_consecutive_on_hours_previous(self, basic_flow_system_linopy_coords, co model.add_variables(lower=0, upper=8, coords=model.get_coords()), ) - mega = model.hours_per_step.sum('time') + model.hours_per_step.isel(time=0) * 3 + mega = model.timestep_duration.sum('time') + model.timestep_duration.isel(time=0) * 3 assert_conequal( model.constraints['Sink(Wärme)|uptime|ub'], @@ -779,7 +779,7 @@ def test_consecutive_on_hours_previous(self, basic_flow_system_linopy_coords, co model.constraints['Sink(Wärme)|uptime|forward'], model.variables['Sink(Wärme)|uptime'].isel(time=slice(1, None)) <= model.variables['Sink(Wärme)|uptime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)), + + model.timestep_duration.isel(time=slice(None, -1)), ) # eq: duration(t) >= duration(t - 1) + dt(t) + (On(t) - 1) * BIG @@ -787,14 +787,14 @@ def test_consecutive_on_hours_previous(self, basic_flow_system_linopy_coords, co model.constraints['Sink(Wärme)|uptime|backward'], model.variables['Sink(Wärme)|uptime'].isel(time=slice(1, None)) >= model.variables['Sink(Wärme)|uptime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)) + + model.timestep_duration.isel(time=slice(None, -1)) + (model.variables['Sink(Wärme)|status'].isel(time=slice(1, None)) - 1) * mega, ) assert_conequal( model.constraints['Sink(Wärme)|uptime|initial'], model.variables['Sink(Wärme)|uptime'].isel(time=0) - == model.variables['Sink(Wärme)|status'].isel(time=0) * (model.hours_per_step.isel(time=0) * (1 + 3)), + == model.variables['Sink(Wärme)|status'].isel(time=0) * (model.timestep_duration.isel(time=0) * (1 + 3)), ) assert_conequal( @@ -850,7 +850,9 @@ def test_consecutive_off_hours(self, basic_flow_system_linopy_coords, coords_con model.add_variables(lower=0, upper=12, coords=model.get_coords()), ) - mega = model.hours_per_step.sum('time') + model.hours_per_step.isel(time=0) * 1 # previously inactive for 1h + mega = ( + model.timestep_duration.sum('time') + model.timestep_duration.isel(time=0) * 1 + ) # previously inactive for 1h assert_conequal( model.constraints['Sink(Wärme)|downtime|ub'], @@ -861,7 +863,7 @@ def test_consecutive_off_hours(self, basic_flow_system_linopy_coords, coords_con model.constraints['Sink(Wärme)|downtime|forward'], model.variables['Sink(Wärme)|downtime'].isel(time=slice(1, None)) <= model.variables['Sink(Wärme)|downtime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)), + + model.timestep_duration.isel(time=slice(None, -1)), ) # eq: duration(t) >= duration(t - 1) + dt(t) + (On(t) - 1) * BIG @@ -869,14 +871,14 @@ def test_consecutive_off_hours(self, basic_flow_system_linopy_coords, coords_con model.constraints['Sink(Wärme)|downtime|backward'], model.variables['Sink(Wärme)|downtime'].isel(time=slice(1, None)) >= model.variables['Sink(Wärme)|downtime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)) + + model.timestep_duration.isel(time=slice(None, -1)) + (model.variables['Sink(Wärme)|inactive'].isel(time=slice(1, None)) - 1) * mega, ) assert_conequal( model.constraints['Sink(Wärme)|downtime|initial'], model.variables['Sink(Wärme)|downtime'].isel(time=0) - == model.variables['Sink(Wärme)|inactive'].isel(time=0) * (model.hours_per_step.isel(time=0) * (1 + 1)), + == model.variables['Sink(Wärme)|inactive'].isel(time=0) * (model.timestep_duration.isel(time=0) * (1 + 1)), ) assert_conequal( @@ -933,7 +935,7 @@ def test_consecutive_off_hours_previous(self, basic_flow_system_linopy_coords, c model.add_variables(lower=0, upper=12, coords=model.get_coords()), ) - mega = model.hours_per_step.sum('time') + model.hours_per_step.isel(time=0) * 2 + mega = model.timestep_duration.sum('time') + model.timestep_duration.isel(time=0) * 2 assert_conequal( model.constraints['Sink(Wärme)|downtime|ub'], @@ -944,7 +946,7 @@ def test_consecutive_off_hours_previous(self, basic_flow_system_linopy_coords, c model.constraints['Sink(Wärme)|downtime|forward'], model.variables['Sink(Wärme)|downtime'].isel(time=slice(1, None)) <= model.variables['Sink(Wärme)|downtime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)), + + model.timestep_duration.isel(time=slice(None, -1)), ) # eq: duration(t) >= duration(t - 1) + dt(t) + (On(t) - 1) * BIG @@ -952,14 +954,14 @@ def test_consecutive_off_hours_previous(self, basic_flow_system_linopy_coords, c model.constraints['Sink(Wärme)|downtime|backward'], model.variables['Sink(Wärme)|downtime'].isel(time=slice(1, None)) >= model.variables['Sink(Wärme)|downtime'].isel(time=slice(None, -1)) - + model.hours_per_step.isel(time=slice(None, -1)) + + model.timestep_duration.isel(time=slice(None, -1)) + (model.variables['Sink(Wärme)|inactive'].isel(time=slice(1, None)) - 1) * mega, ) assert_conequal( model.constraints['Sink(Wärme)|downtime|initial'], model.variables['Sink(Wärme)|downtime'].isel(time=0) - == model.variables['Sink(Wärme)|inactive'].isel(time=0) * (model.hours_per_step.isel(time=0) * (1 + 2)), + == model.variables['Sink(Wärme)|inactive'].isel(time=0) * (model.timestep_duration.isel(time=0) * (1 + 2)), ) assert_conequal( @@ -1067,7 +1069,7 @@ def test_on_hours_limits(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['Sink(Wärme)|active_hours'], flow.submodel.variables['Sink(Wärme)|active_hours'] - == (flow.submodel.variables['Sink(Wärme)|status'] * model.hours_per_step).sum('time'), + == (flow.submodel.variables['Sink(Wärme)|status'] * model.timestep_duration).sum('time'), ) @@ -1131,7 +1133,7 @@ def test_flow_on_invest_optional(self, basic_flow_system_linopy_coords, coords_c model.add_variables(binary=True, coords=model.get_coords()), ) # Upper bound is total hours when active_hours_max is not specified - total_hours = model.hours_per_step.sum('time') + total_hours = model.timestep_duration.sum('time') assert_var_equal( model.variables['Sink(Wärme)|active_hours'], model.add_variables(lower=0, upper=total_hours, coords=model.get_coords(['period', 'scenario'])), @@ -1157,7 +1159,7 @@ def test_flow_on_invest_optional(self, basic_flow_system_linopy_coords, coords_c assert_conequal( model.constraints['Sink(Wärme)|active_hours'], flow.submodel.variables['Sink(Wärme)|active_hours'] - == (flow.submodel.variables['Sink(Wärme)|status'] * model.hours_per_step).sum('time'), + == (flow.submodel.variables['Sink(Wärme)|status'] * model.timestep_duration).sum('time'), ) # Investment @@ -1233,7 +1235,7 @@ def test_flow_on_invest_non_optional(self, basic_flow_system_linopy_coords, coor model.add_variables(binary=True, coords=model.get_coords()), ) # Upper bound is total hours when active_hours_max is not specified - total_hours = model.hours_per_step.sum('time') + total_hours = model.timestep_duration.sum('time') assert_var_equal( model.variables['Sink(Wärme)|active_hours'], model.add_variables(lower=0, upper=total_hours, coords=model.get_coords(['period', 'scenario'])), @@ -1251,7 +1253,7 @@ def test_flow_on_invest_non_optional(self, basic_flow_system_linopy_coords, coor assert_conequal( model.constraints['Sink(Wärme)|active_hours'], flow.submodel.variables['Sink(Wärme)|active_hours'] - == (flow.submodel.variables['Sink(Wärme)|status'] * model.hours_per_step).sum('time'), + == (flow.submodel.variables['Sink(Wärme)|status'] * model.timestep_duration).sum('time'), ) # Investment diff --git a/tests/test_flow_system_resample.py b/tests/test_flow_system_resample.py index 7486b173c..dd5e19176 100644 --- a/tests/test_flow_system_resample.py +++ b/tests/test_flow_system_resample.py @@ -128,7 +128,7 @@ def test_time_metadata_updated(simple_fs): """Test time metadata correctly updated.""" fs_r = simple_fs.resample('3h', method='mean') assert len(fs_r.timesteps) == 8 - assert_allclose(fs_r.hours_per_timestep.values, 3.0) + assert_allclose(fs_r.timestep_duration.values, 3.0) assert fs_r.hours_of_last_timestep == 3.0 diff --git a/tests/test_linear_converter.py b/tests/test_linear_converter.py index 57b911d64..d20d104d0 100644 --- a/tests/test_linear_converter.py +++ b/tests/test_linear_converter.py @@ -174,7 +174,7 @@ def test_linear_converter_with_status(self, basic_flow_system_linopy_coords, coo assert_conequal( model.constraints['Converter|active_hours'], model.variables['Converter|active_hours'] - == (model.variables['Converter|status'] * model.hours_per_step).sum('time'), + == (model.variables['Converter|status'] * model.timestep_duration).sum('time'), ) # Check conversion constraint @@ -188,7 +188,7 @@ def test_linear_converter_with_status(self, basic_flow_system_linopy_coords, coo assert_conequal( model.constraints['Converter->costs(temporal)'], model.variables['Converter->costs(temporal)'] - == model.variables['Converter|status'] * model.hours_per_step * 5, + == model.variables['Converter|status'] * model.timestep_duration * 5, ) def test_linear_converter_multidimensional(self, basic_flow_system_linopy_coords, coords_config): @@ -485,7 +485,7 @@ def test_piecewise_conversion_with_status(self, basic_flow_system_linopy_coords, assert 'Converter|active_hours' in model.constraints assert_conequal( model.constraints['Converter|active_hours'], - model['Converter|active_hours'] == (model['Converter|status'] * model.hours_per_step).sum('time'), + model['Converter|active_hours'] == (model['Converter|status'] * model.timestep_duration).sum('time'), ) # Verify that the costs effect is applied @@ -493,7 +493,7 @@ def test_piecewise_conversion_with_status(self, basic_flow_system_linopy_coords, assert_conequal( model.constraints['Converter->costs(temporal)'], model.variables['Converter->costs(temporal)'] - == model.variables['Converter|status'] * model.hours_per_step * 5, + == model.variables['Converter|status'] * model.timestep_duration * 5, ) diff --git a/tests/test_on_hours_computation.py b/tests/test_on_hours_computation.py index 578fd7792..c74332565 100644 --- a/tests/test_on_hours_computation.py +++ b/tests/test_on_hours_computation.py @@ -9,7 +9,7 @@ class TestComputeConsecutiveDuration: """Tests for the compute_consecutive_hours_in_state static method.""" @pytest.mark.parametrize( - 'binary_values, hours_per_timestep, expected', + 'binary_values, timestep_duration, expected', [ # Case 1: Single timestep DataArrays (xr.DataArray([1], dims=['time']), 5, 5), @@ -26,22 +26,22 @@ class TestComputeConsecutiveDuration: (xr.DataArray([0, 1, 1, 1, 0, 0], dims=['time']), 1, 0), # ends with 0 ], ) - def test_compute_duration(self, binary_values, hours_per_timestep, expected): + def test_compute_duration(self, binary_values, timestep_duration, expected): """Test compute_consecutive_hours_in_state with various inputs.""" - result = ModelingUtilities.compute_consecutive_hours_in_state(binary_values, hours_per_timestep) + result = ModelingUtilities.compute_consecutive_hours_in_state(binary_values, timestep_duration) assert np.isclose(result, expected) @pytest.mark.parametrize( - 'binary_values, hours_per_timestep', + 'binary_values, timestep_duration', [ - # Case: hours_per_timestep must be scalar + # Case: timestep_duration must be scalar (xr.DataArray([1, 1, 1, 1, 1], dims=['time']), np.array([1, 2])), ], ) - def test_compute_duration_raises_error(self, binary_values, hours_per_timestep): + def test_compute_duration_raises_error(self, binary_values, timestep_duration): """Test error conditions.""" with pytest.raises(TypeError): - ModelingUtilities.compute_consecutive_hours_in_state(binary_values, hours_per_timestep) + ModelingUtilities.compute_consecutive_hours_in_state(binary_values, timestep_duration) class TestComputePreviousOnStates: diff --git a/tests/test_storage.py b/tests/test_storage.py index 15170a321..3fd47fbf8 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -73,8 +73,8 @@ def test_basic_storage(self, basic_flow_system_linopy_coords, coords_config): model.constraints['TestStorage|charge_state'], charge_state.isel(time=slice(1, None)) == charge_state.isel(time=slice(None, -1)) - + model.variables['TestStorage(Q_th_in)|flow_rate'] * model.hours_per_step - - model.variables['TestStorage(Q_th_out)|flow_rate'] * model.hours_per_step, + + model.variables['TestStorage(Q_th_in)|flow_rate'] * model.timestep_duration + - model.variables['TestStorage(Q_th_out)|flow_rate'] * model.timestep_duration, ) # Check initial charge state constraint assert_conequal( @@ -146,7 +146,7 @@ def test_lossy_storage(self, basic_flow_system_linopy_coords, coords_config): charge_state = model.variables['TestStorage|charge_state'] rel_loss = 0.05 - hours_per_step = model.hours_per_step + timestep_duration = model.timestep_duration charge_rate = model.variables['TestStorage(Q_th_in)|flow_rate'] discharge_rate = model.variables['TestStorage(Q_th_out)|flow_rate'] eff_charge = 0.9 @@ -155,9 +155,9 @@ def test_lossy_storage(self, basic_flow_system_linopy_coords, coords_config): assert_conequal( model.constraints['TestStorage|charge_state'], charge_state.isel(time=slice(1, None)) - == charge_state.isel(time=slice(None, -1)) * (1 - rel_loss) ** hours_per_step - + charge_rate * eff_charge * hours_per_step - - discharge_rate / eff_discharge * hours_per_step, + == charge_state.isel(time=slice(None, -1)) * (1 - rel_loss) ** timestep_duration + + charge_rate * eff_charge * timestep_duration + - discharge_rate / eff_discharge * timestep_duration, ) # Check initial charge state constraint @@ -242,8 +242,8 @@ def test_charge_state_bounds(self, basic_flow_system_linopy_coords, coords_confi model.constraints['TestStorage|charge_state'], charge_state.isel(time=slice(1, None)) == charge_state.isel(time=slice(None, -1)) - + model.variables['TestStorage(Q_th_in)|flow_rate'] * model.hours_per_step - - model.variables['TestStorage(Q_th_out)|flow_rate'] * model.hours_per_step, + + model.variables['TestStorage(Q_th_in)|flow_rate'] * model.timestep_duration + - model.variables['TestStorage(Q_th_out)|flow_rate'] * model.timestep_duration, ) # Check initial charge state constraint assert_conequal(