import pandas as pd from datetime import timedelta, datetime from zoneinfo import ZoneInfo # Step 1: Create the test data in UTC and convert to Europe/Amsterdam timezone def create_test_data(): # Create the dataset in UTC with continuous 15-minute intervals df_test_utc = pd.DataFrame({ 'datetime': pd.date_range('2023-03-25', '2023-10-30', freq='15min', tz='UTC'), 'cumulative_col1': range(10000, 10000 + len(pd.date_range('2023-03-25', '2023-10-30', freq='15min', tz='UTC'))), 'cumulative_col2': range(20000, 20000 + len(pd.date_range('2023-03-25', '2023-10-30', freq='15min', tz='UTC'))) }) # Convert the UTC datetime column to Europe/Amsterdam timezone df_test_dutch = df_test_utc.copy() df_test_dutch['datetime'] = df_test_dutch['datetime'].dt.tz_convert('Europe/Amsterdam') # Return the prepared dataframe return df_test_dutch # Function to get exact DST transitions for a given year def get_exact_dst_transitions(year, timezone='Europe/Amsterdam'): tz = ZoneInfo(timezone) start_of_year = datetime(year, 1, 1, tzinfo=tz) end_of_year = datetime(year + 1, 1, 1, tzinfo=tz) current_time = start_of_year transitions = [] # Check hour by hour for the exact transition while current_time < end_of_year: next_time = current_time + timedelta(hours=1) if current_time.utcoffset() != next_time.utcoffset(): transition_type = "Start of DST" if next_time.utcoffset() > current_time.utcoffset() else "End of DST" transitions.append({ 'transition': transition_type, 'exact_time': next_time.astimezone(tz), 'previous_offset': current_time.utcoffset(), 'new_offset': next_time.utcoffset() }) current_time = next_time return transitions # Step 2: Function to correct summer time, log timezone changes, and handle DST dynamically def correct_summer_time_with_timezone_check(df, datetime_col, timezone='Europe/Amsterdam'): # Check if the datetime column is timezone-aware if isinstance(df[datetime_col].dtype, pd.DatetimeTZDtype): detected_timezone = df[datetime_col].dt.tz.zone # Ensure the timezone matches the one passed to the function if detected_timezone != timezone: raise ValueError(f"Detected timezone '{detected_timezone}' does not match the expected timezone '{timezone}'") print(f"Timezone '{detected_timezone}' detected. Removing timezone information.") df[datetime_col] = df[datetime_col].dt.tz_localize(None) # Get the unique years in the dataset years = pd.to_datetime(df[datetime_col]).dt.year.unique() # Adjust for each year's DST transition period for year in years: transitions = get_exact_dst_transitions(year, timezone) start_of_dst = transitions[0]['exact_time'].replace(tzinfo=None) end_of_dst = transitions[1]['exact_time'].replace(tzinfo=None) # Subtract 1 hour during the DST period dst_mask = (df[datetime_col] >= start_of_dst) & (df[datetime_col] < end_of_dst) df.loc[dst_mask, datetime_col] = df.loc[dst_mask, datetime_col] - timedelta(hours=1) return df # Example of usage if __name__ == "__main__": # Create test data df_test = create_test_data() # Correct summer time and remove timezone information df_corrected = correct_summer_time_with_timezone_check(df_test, 'datetime') # Display the dataset for the DST transition periods dst_transition_corrected = df_corrected[ (df_corrected['datetime'] >= '2023-03-26 01:00:00') & (df_corrected['datetime'] <= '2023-03-26 04:00:00') ] print(dst_transition_corrected) dst_transition_winter = df_corrected[ (df_corrected['datetime'] >= '2023-10-29 01:00:00') & (df_corrected['datetime'] <= '2023-10-29 04:00:00') ] print(dst_transition_winter)