96 lines
3.9 KiB
Python
96 lines
3.9 KiB
Python
|
|
import pandas as pd
|
|
from datetime import timedelta, datetime
|
|
from zoneinfo import ZoneInfo
|
|
|
|
# Step 1: Create the test data in UTC and convert to Europe/Amsterdam timezone
|
|
def create_test_data():
|
|
# Create the dataset in UTC with continuous 15-minute intervals
|
|
df_test_utc = pd.DataFrame({
|
|
'datetime': pd.date_range('2023-03-25', '2023-10-30', freq='15min', tz='UTC'),
|
|
'cumulative_col1': range(10000, 10000 + len(pd.date_range('2023-03-25', '2023-10-30', freq='15min', tz='UTC'))),
|
|
'cumulative_col2': range(20000, 20000 + len(pd.date_range('2023-03-25', '2023-10-30', freq='15min', tz='UTC')))
|
|
})
|
|
|
|
# Convert the UTC datetime column to Europe/Amsterdam timezone
|
|
df_test_dutch = df_test_utc.copy()
|
|
df_test_dutch['datetime'] = df_test_dutch['datetime'].dt.tz_convert('Europe/Amsterdam')
|
|
|
|
# Return the prepared dataframe
|
|
return df_test_dutch
|
|
|
|
# Function to get exact DST transitions for a given year
|
|
def get_exact_dst_transitions(year, timezone='Europe/Amsterdam'):
|
|
tz = ZoneInfo(timezone)
|
|
start_of_year = datetime(year, 1, 1, tzinfo=tz)
|
|
end_of_year = datetime(year + 1, 1, 1, tzinfo=tz)
|
|
|
|
current_time = start_of_year
|
|
transitions = []
|
|
|
|
# Check hour by hour for the exact transition
|
|
while current_time < end_of_year:
|
|
next_time = current_time + timedelta(hours=1)
|
|
if current_time.utcoffset() != next_time.utcoffset():
|
|
transition_type = "Start of DST" if next_time.utcoffset() > current_time.utcoffset() else "End of DST"
|
|
transitions.append({
|
|
'transition': transition_type,
|
|
'exact_time': next_time.astimezone(tz),
|
|
'previous_offset': current_time.utcoffset(),
|
|
'new_offset': next_time.utcoffset()
|
|
})
|
|
|
|
current_time = next_time
|
|
|
|
return transitions
|
|
|
|
# Step 2: Function to correct summer time, log timezone changes, and handle DST dynamically
|
|
def correct_summer_time_with_timezone_check(df, datetime_col, timezone='Europe/Amsterdam'):
|
|
# Check if the datetime column is timezone-aware
|
|
|
|
if isinstance(df[datetime_col].dtype, pd.DatetimeTZDtype):
|
|
detected_timezone = df[datetime_col].dt.tz.zone
|
|
|
|
# Ensure the timezone matches the one passed to the function
|
|
if detected_timezone != timezone:
|
|
raise ValueError(f"Detected timezone '{detected_timezone}' does not match the expected timezone '{timezone}'")
|
|
|
|
print(f"Timezone '{detected_timezone}' detected. Removing timezone information.")
|
|
df[datetime_col] = df[datetime_col].dt.tz_localize(None)
|
|
|
|
# Get the unique years in the dataset
|
|
years = pd.to_datetime(df[datetime_col]).dt.year.unique()
|
|
|
|
# Adjust for each year's DST transition period
|
|
for year in years:
|
|
transitions = get_exact_dst_transitions(year, timezone)
|
|
start_of_dst = transitions[0]['exact_time'].replace(tzinfo=None)
|
|
end_of_dst = transitions[1]['exact_time'].replace(tzinfo=None)
|
|
|
|
# Subtract 1 hour during the DST period
|
|
dst_mask = (df[datetime_col] >= start_of_dst) & (df[datetime_col] < end_of_dst)
|
|
df.loc[dst_mask, datetime_col] = df.loc[dst_mask, datetime_col] - timedelta(hours=1)
|
|
|
|
return df
|
|
|
|
# Example of usage
|
|
if __name__ == "__main__":
|
|
# Create test data
|
|
df_test = create_test_data()
|
|
|
|
# Correct summer time and remove timezone information
|
|
df_corrected = correct_summer_time_with_timezone_check(df_test, 'datetime')
|
|
|
|
# Display the dataset for the DST transition periods
|
|
dst_transition_corrected = df_corrected[
|
|
(df_corrected['datetime'] >= '2023-03-26 01:00:00') &
|
|
(df_corrected['datetime'] <= '2023-03-26 04:00:00')
|
|
]
|
|
print(dst_transition_corrected)
|
|
|
|
dst_transition_winter = df_corrected[
|
|
(df_corrected['datetime'] >= '2023-10-29 01:00:00') &
|
|
(df_corrected['datetime'] <= '2023-10-29 04:00:00')
|
|
]
|
|
print(dst_transition_winter)
|