dstuncrusher/summer_time_correction.py

96 lines
3.9 KiB
Python

import pandas as pd
from datetime import timedelta, datetime
from zoneinfo import ZoneInfo
# Step 1: Create the test data in UTC and convert to Europe/Amsterdam timezone
def create_test_data():
# Create the dataset in UTC with continuous 15-minute intervals
df_test_utc = pd.DataFrame({
'datetime': pd.date_range('2023-03-25', '2023-10-30', freq='15min', tz='UTC'),
'cumulative_col1': range(10000, 10000 + len(pd.date_range('2023-03-25', '2023-10-30', freq='15min', tz='UTC'))),
'cumulative_col2': range(20000, 20000 + len(pd.date_range('2023-03-25', '2023-10-30', freq='15min', tz='UTC')))
})
# Convert the UTC datetime column to Europe/Amsterdam timezone
df_test_dutch = df_test_utc.copy()
df_test_dutch['datetime'] = df_test_dutch['datetime'].dt.tz_convert('Europe/Amsterdam')
# Return the prepared dataframe
return df_test_dutch
# Function to get exact DST transitions for a given year
def get_exact_dst_transitions(year, timezone='Europe/Amsterdam'):
tz = ZoneInfo(timezone)
start_of_year = datetime(year, 1, 1, tzinfo=tz)
end_of_year = datetime(year + 1, 1, 1, tzinfo=tz)
current_time = start_of_year
transitions = []
# Check hour by hour for the exact transition
while current_time < end_of_year:
next_time = current_time + timedelta(hours=1)
if current_time.utcoffset() != next_time.utcoffset():
transition_type = "Start of DST" if next_time.utcoffset() > current_time.utcoffset() else "End of DST"
transitions.append({
'transition': transition_type,
'exact_time': next_time.astimezone(tz),
'previous_offset': current_time.utcoffset(),
'new_offset': next_time.utcoffset()
})
current_time = next_time
return transitions
# Step 2: Function to correct summer time, log timezone changes, and handle DST dynamically
def correct_summer_time_with_timezone_check(df, datetime_col, timezone='Europe/Amsterdam'):
# Check if the datetime column is timezone-aware
if isinstance(df[datetime_col].dtype, pd.DatetimeTZDtype):
detected_timezone = df[datetime_col].dt.tz.zone
# Ensure the timezone matches the one passed to the function
if detected_timezone != timezone:
raise ValueError(f"Detected timezone '{detected_timezone}' does not match the expected timezone '{timezone}'")
print(f"Timezone '{detected_timezone}' detected. Removing timezone information.")
df[datetime_col] = df[datetime_col].dt.tz_localize(None)
# Get the unique years in the dataset
years = pd.to_datetime(df[datetime_col]).dt.year.unique()
# Adjust for each year's DST transition period
for year in years:
transitions = get_exact_dst_transitions(year, timezone)
start_of_dst = transitions[0]['exact_time'].replace(tzinfo=None)
end_of_dst = transitions[1]['exact_time'].replace(tzinfo=None)
# Subtract 1 hour during the DST period
dst_mask = (df[datetime_col] >= start_of_dst) & (df[datetime_col] < end_of_dst)
df.loc[dst_mask, datetime_col] = df.loc[dst_mask, datetime_col] - timedelta(hours=1)
return df
# Example of usage
if __name__ == "__main__":
# Create test data
df_test = create_test_data()
# Correct summer time and remove timezone information
df_corrected = correct_summer_time_with_timezone_check(df_test, 'datetime')
# Display the dataset for the DST transition periods
dst_transition_corrected = df_corrected[
(df_corrected['datetime'] >= '2023-03-26 01:00:00') &
(df_corrected['datetime'] <= '2023-03-26 04:00:00')
]
print(dst_transition_corrected)
dst_transition_winter = df_corrected[
(df_corrected['datetime'] >= '2023-10-29 01:00:00') &
(df_corrected['datetime'] <= '2023-10-29 04:00:00')
]
print(dst_transition_winter)