# -*- coding: utf-8 -*-
"""
Created on Fri Aug 16 12:39:57 2024
@author: DEASV
"""
import os
import re
import numpy as np
import pandas as pd
import xarray as xr
from calendar import monthrange
from opendap_download.multi_processing_download import DownloadManager
import math
import tkinter as tk
from tkinter import filedialog
from PIL import ImageTk, Image
'''
from docx import Document
from docx.shared import Inches, Pt
'''
def update():
global checkbox
if var1.get() and var2.get():
checkbox = 'both'
return checkbox
elif var1.get():
checkbox = 'download'
return checkbox
elif var2.get():
checkbox = 'calculate'
return checkbox
else:
print("No checkboxes are selected")
def open_file():
global file_path
file_path = filedialog.askopenfilename(filetypes=[('Excel Files', '*.xlsx')])
if file_path:
root.destroy()
root = tk.Tk()
root.title("Corrosivity Estimation")
img = Image.open("Vestas.jpg")
img = img.resize((750, 300), Image.ANTIALIAS)
img = ImageTk.PhotoImage(img)
panel = tk.Label(root, image=img)
panel.pack(side="top", fill="both", expand="yes")
var1 = tk.IntVar()
var2 = tk.IntVar()
chk1 = tk.Checkbutton(root, text='Download Corrosion Data', variable=var1, command=update)
chk2 = tk.Checkbutton(root, text='Calculate Corrosion Class', variable=var2, command=update)
chk1.pack()
chk2.pack()
btn = tk.Button(root, text='Proceed', command=open_file)
btn.pack()
root.mainloop()
# Convert each sheet to a dataframe
df1 = pd.read_excel(file_path, sheet_name='pd')
df2 = pd.read_excel(file_path, sheet_name='input')
df3 = pd.read_excel(file_path, sheet_name='locs', usecols=['location', 'latitude', 'longitude'])
df4 = pd.read_excel(file_path, sheet_name='years')
df5 = pd.read_excel(file_path, sheet_name='sd')
df8 = pd.read_excel(file_path, sheet_name='mass')
df11 = pd.read_excel(file_path, sheet_name='locs', usecols=['location'])
df12 = pd.read_excel(file_path, sheet_name='locs', usecols=['lifetime'])
location = df11['location'].tolist()
lifetime = df12['lifetime'].tolist()
# Convert the column into a list
database_name = df1['database_name'].tolist()
database_id = df1['database_id'].tolist()
field_id = df1['field_id'].tolist()
field_name_pd = df1['field_name'].tolist()
database_name_sd = df5['database_name'].tolist()
database_id_sd = df5['database_id'].tolist()
field_id_sd = df5['field_id'].tolist()
field_name_sd = df5['field_name'].tolist()
database_name_mass = df8['database_name'].tolist()
database_id_mass = df8['database_id'].tolist()
field_id_mass = df8['field_id'].tolist()
field_name_mass = df8['field_name'].tolist()
result_dict = df2.set_index('Input')['Value'].to_dict()
out = df3.to_numpy().tolist()
out = [tuple(elt) for elt in out]
####### INPUTS - CHANGE THESE #########
username = result_dict['username'] # Username for MERRA download account
password = result_dict['password'] # Password for MERRA download account
conversion_function = lambda x: x # Unit conversion function to be applied to daily data. Here is the unit conversion for temperature from Kelvin to Celsius.
aggregator = result_dict['aggregator'] # Method by which data will be aggregated over days and weeks. Can be "sum", "mean", "min", or "max"
years = df4['years'].tolist() # List of years for which data will be downloaded
num_years=len(years)
locs = out # List of locations for which data will be downloaded. Each location is a three-tuple, consisting of name (string), latitude, and longitude floats
####### CONSTANTS - DO NOT CHANGE BELOW THIS LINE #######
lat_coords = np.arange(0, 361, dtype=int)
lon_coords = np.arange(0, 576, dtype=int)
database_url_pd = []
database_url_sd = []
database_url_mass = []
for i in range(len(database_name)):
database_url_pd.append('
https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/' + database_name
+ '.5.12.4/')
for i in range(len(database_name_sd)):
database_url_sd.append('https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/' + database_name_sd + '.5.12.4/')
for i in range(len(database_name_mass)):
database_url_mass.append('https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/' + database_name_mass + '.5.12.4/')
NUMBER_OF_CONNECTIONS = 5
dir_path = os.path.dirname(os.path.realpath(__file__))
####### DOWNLOAD DATA #########
# Translate lat/lon into coordinates that MERRA-2 understands
def translate_lat_to_geos5_native(latitude):
return ((latitude + 90) / 0.5)
def translate_lon_to_geos5_native(longitude):
return ((longitude + 180) / 0.625)
def find_closest_coordinate(calc_coord, coord_array):
index = np.abs(coord_array-calc_coord).argmin()
return coord_array[index]
def translate_year_to_file_number(year):
file_number = ''
if year >= 1980 and year < 1992:
file_number = '100'
elif year >= 1992 and year < 2001:
file_number = '200'
elif year >= 2001 and year < 2011:
file_number = '300'
elif year >= 2011:
file_number = '400'
else:
raise Exception('The specified year is out of range.')
return file_number
def generate_url_params(parameter, time_para, lat_para, lon_para):
parameter = map(lambda x: x + time_para, parameter)
parameter = map(lambda x: x + lat_para, parameter)
parameter = map(lambda x: x + lon_para, parameter)
return ','.join(parameter)
def generate_download_links(download_years, base_url, dataset_name, url_params):
urls = []
for y in download_years:
y_str = str(y)
file_num = translate_year_to_file_number(y)
for m in range(1, 13):
m_str = str(m).zfill(2)
_, nr_of_days = monthrange(y, m)
for d in range(1, nr_of_days + 1):
d_str = str(d).zfill(2)
file_name = 'MERRA2_{num}.{name}.{y}{m}{d}.nc4'.format(
num=file_num, name=dataset_name,
y=y_str, m=m_str, d=d_str)
query = '{base}{y}/{m}/{name}.nc4?{params}'.format(
base=base_url, y=y_str, m=m_str,
name=file_name, params=url_params)
urls.append(query)
return urls
if (checkbox == 'both' or checkbox == 'download'):
print('DOWNLOADING DATA FROM MERRA')
for loc, lat, lon in locs:
for i in range(len(database_name)):
print('Downloading ' + field_name_pd + ' data for ' + loc)
lat_coord = translate_lat_to_geos5_native(lat)
lon_coord = translate_lon_to_geos5_native(lon)
lat_closest = find_closest_coordinate(lat_coord, lat_coords)
lon_closest = find_closest_coordinate(lon_coord, lon_coords)
requested_lat = '[{lat}:1:{lat}]'.format(lat=lat_closest)
requested_lon = '[{lon}:1:{lon}]'.format(lon=lon_closest)
parameter = generate_url_params([field_id], '[0:1:23]', requested_lat, requested_lon)
generated_URL = generate_download_links(years, database_url_pd, database_id, parameter)
folder_name = field_name_pd + '\\' + loc
folder_path = os.path.join(dir_path, folder_name)
if not os.path.exists(folder_path):
download_manager = DownloadManager()
download_manager.set_username_and_password(username, password)
download_manager.download_path = field_name_pd + '/' + loc
download_manager.download_urls = generated_URL
download_manager.start_download(NUMBER_OF_CONNECTIONS)
for loc, lat, lon in locs:
for i in range(len(database_name_sd)):
print('Downloading ' + field_name_sd + ' data for ' + loc)
lat_coord = translate_lat_to_geos5_native(lat)
lon_coord = translate_lon_to_geos5_native(lon)
lat_closest = find_closest_coordinate(lat_coord, lat_coords)
lon_closest = find_closest_coordinate(lon_coord, lon_coords)
requested_lat = '[{lat}:1:{lat}]'.format(lat=lat_closest)
requested_lon = '[{lon}:1:{lon}]'.format(lon=lon_closest)
parameter = generate_url_params([field_id_sd[i]], '[0:1:23]', requested_lat, requested_lon)
generated_URL = generate_download_links(years, database_url_sd[i], database_id_sd[i], parameter)
folder_name = field_name_sd[i] + '\\' + loc
folder_path = os.path.join(dir_path, folder_name)
if not os.path.exists(folder_path):
download_manager = DownloadManager()
download_manager.set_username_and_password(username, password)
download_manager.download_path = field_name_sd[i] + '/' + loc
download_manager.download_urls = generated_URL
download_manager.start_download(NUMBER_OF_CONNECTIONS)
for loc, lat, lon in locs:
for i in range(len(database_name_mass)):
print('Downloading ' + field_name_mass[i] + ' data for ' + loc)
lat_coord = translate_lat_to_geos5_native(lat)
lon_coord = translate_lon_to_geos5_native(lon)
lat_closest = find_closest_coordinate(lat_coord, lat_coords)
lon_closest = find_closest_coordinate(lon_coord, lon_coords)
requested_lat = '[{lat}:1:{lat}]'.format(lat=lat_closest)
requested_lon = '[{lon}:1:{lon}]'.format(lon=lon_closest)
parameter = generate_url_params([field_id_mass[i]], '[0:1:23]', requested_lat, requested_lon)
generated_URL = generate_download_links(years, database_url_mass[i], database_id_mass[i], parameter)
folder_name = field_name_mass[i] + '\\' + loc
folder_path = os.path.join(dir_path, folder_name)
if not os.path.exists(folder_path):
download_manager = DownloadManager()
download_manager.set_username_and_password(username, password)
download_manager.download_path = field_name_mass[i] + '/' + loc
download_manager.download_urls = generated_URL
download_manager.start_download(NUMBER_OF_CONNECTIONS)
def extract_date(data_set):
if 'HDF5_GLOBAL.Filename' in data_set.attrs:
f_name = data_set.attrs['HDF5_GLOBAL.Filename']
elif 'Filename' in data_set.attrs:
f_name = data_set.attrs['Filename']
else:
raise AttributeError('The attribute name has changed again!')
exp = r'(?<=\.)[^\.]*(?=\.nc4)'
res = re.search(exp, f_name).group(0)
y, m, d = res[0:4], res[4:6], res[6:8]
date_str = ('%s-%s-%s' % (y, m, d))
data_set = data_set.assign(date=date_str)
return data_set
def mean_plus_std(x):
return x.mean() + x.std()
def percentile_90(x):
return np.percentile(x, 90)
def rclass_calculation(rcorr_steel_method3):
if rcorr_steel_method3 <= 1.3:
rclass_method3 = 'C1'
elif (rcorr_steel_method3 > 1.3 and rcorr_steel_method3 <= 25):
rclass_method3 = 'C2'
elif (rcorr_steel_method3 > 25 and rcorr_steel_method3 <= 50):
rclass_method3 = 'C3'
elif (rcorr_steel_method3 > 50 and rcorr_steel_method3 <= 80):
rclass_method3 = 'C4'
elif (rcorr_steel_method3 > 80 and rcorr_steel_method3 <= 200):
rclass_method3 = 'C5'
elif (rcorr_steel_method3 > 200 and rcorr_steel_method3 <= 700):
rclass_method3 = 'CX'
else:
rclass_method3 = 'NA'
return rclass_method3
def rclass_calculation_zinc(rcorr_steel_method3):
if rcorr_steel_method3 <= 0.1:
rclass_method3 = 'C1'
elif (rcorr_steel_method3 > 0.1 and rcorr_steel_method3 <= 0.7):
rclass_method3 = 'C2'
elif (rcorr_steel_method3 > 0.7 and rcorr_steel_method3 <= 2.1):
rclass_method3 = 'C3'
elif (rcorr_steel_method3 > 2.1 and rcorr_steel_method3 <= 4.2):
rclass_method3 = 'C4'
elif (rcorr_steel_method3 > 4.2 and rcorr_steel_method3 <= 8.4):
rclass_method3 = 'C5'
elif (rcorr_steel_method3 > 8.4 and rcorr_steel_method3 <= 25):
rclass_method3 = 'CX'
else:
rclass_method3 = 'NA'
return rclass_method3
def thickness_loss(lifetime, b, r_corr):
th_loss = r_corr * (lifetime ** b)
return th_loss
def print_results(loc, temp, rh, lifetime, pd_list_mean, sd_list_mean, material, rclass_method3, rcorr_steel_method3, thickness_loss):
with open(dir_path + "\\result.txt", 'a') as f:
f.write("\nLocation: " + str(loc) + " Temperature: " + str(temp) + " Rh: " + str(rh) + " lifetime: " + str(lifetime) + " Pd: " + str(pd_list_mean) + " Sd: " + str(sd_list_mean) + " material: " + str(material) + " method: 3 rcorr: " + str(rcorr_steel_method3) + " rclass: " + str(rclass_method3) + " thickness loss: " + str(thickness_loss))
if (checkbox == 'both' or checkbox == 'calculate'):
print('CLEANING AND MERGING DATA')
pd_list_mean = []
sd_list_mean = []
rcorr_steel_method3_list = []
rclass_steel_method3_list = []
# Write mass
mass_list_percentile = []
for loc, lat, lon in locs:
for i in range(len(database_name_mass)):
print('Cleaning and merging ' + field_name_mass[i] + ' data for ' + loc)
dfs = []
for file in os.listdir(field_name_mass[i] + '/' + loc):
if '.nc4' in file:
try:
with xr.open_mfdataset(field_name_mass[i] + '/' + loc + '/' + file, preprocess=extract_date) as df:
dfs.append(df.to_dataframe())
except:
print('Issue with file ' + file)
if dfs:
df_hourly = pd.concat(dfs)
df_hourly['time'] = df_hourly.index.get_level_values(level=2)
df_hourly.columns = [field_name_mass[i], 'date', 'time']
df_hourly[field_name_mass[i]] = df_hourly[field_name_mass[i]].apply(conversion_function)
df_hourly['date'] = pd.to_datetime(df_hourly['date'])
df_hourly.to_csv(field_name_mass[i] + '/' + loc + '_hourly.csv', header=[field_name_mass[i], 'date', 'time'], index=False)
df_hourly = pd.read_csv(field_name_mass[i] + '/' + loc + '_hourly.csv')
df_daily = df_hourly.groupby('date').agg(aggregator)
df_daily = df_daily.drop('time', axis=1)
df_daily['date'] = df_daily.index
df_daily.to_csv(field_name_mass[i] + '/' + loc + '_daily.csv', header=[field_name_mass[i], 'date'], index=False)
df_weekly = df_daily
df_weekly['Week'] = pd.to_datetime(df_weekly['date']).apply(lambda x: x.isocalendar()[1])
df_weekly['Year'] = pd.to_datetime(df_weekly['date']).apply(lambda x: x.year)
df_weekly = df_weekly.groupby(['Year', 'Week']).agg(aggregator)
df_weekly['Year'] = df_weekly.index.get_level_values(0)
df_weekly['Week'] = df_weekly.index.get_level_values(1)
df_weekly.to_csv(field_name_mass[i] + '/' + loc + '_weekly.csv', index=False)
df_yearly = df_daily
df_yearly_std = df_daily
df_yearly_percentile = df_daily
df_yearly['Year'] = pd.to_datetime(df_yearly['date']).apply(lambda x: x.year)
df_yearly_std['Year'] = pd.to_datetime(df_yearly['date']).apply(lambda x: x.year)
df_yearly_percentile['Year'] = pd.to_datetime(df_yearly['date']).apply(lambda x: x.year)
df_yearly1 = df_yearly_std.groupby(['Year']).agg(mean_plus_std).reset_index() # Reset index to handle re-indexing
df_yearly2 = df_yearly_percentile.groupby(['Year']).agg(percentile_90).reset_index() # Reset index to handle re-indexing
df_yearly = df_yearly.groupby(['Year']).agg(aggregator).reset_index() # Reset index to handle re-indexing
df_yearly.to_csv(field_name_mass[i] + '/' + loc + '_yearly_mean.csv', index=False)
df_yearly1.to_csv(field_name_mass[i] + '/' + loc + '_yearly_mean_stddev.csv', index=False)
df_yearly2.to_csv(field_name_mass[i] + '/' + loc + '_yearly_percentile.csv', index=False)
else:
with open(dir_path + "\\result.txt", 'a') as f:
f.write("\nNo valid files for " + field_name_mass[i] + " at " + loc + ", assuming zero values.")
pd.DataFrame(columns=[field_name_mass[i], 'date']).to_csv(field_name_mass[i] + '/' + loc + '_hourly.csv', index=False)
pd.DataFrame(columns=[field_name_mass[i], 'date']).to_csv(field_name_mass[i] + '/' + loc + '_daily.csv', index=False)
pd.DataFrame(columns=[field_name_mass[i], 'Year']).to_csv(field_name_mass[i] + '/' + loc + '_yearly_mean.csv', index=False)
pd.DataFrame(columns=[field_name_mass[i], 'Year']).to_csv(field_name_mass[i] + '/' + loc + '_yearly_mean_stddev.csv', index=False)
pd.DataFrame(columns=[field_name_mass[i], 'Year']).to_csv(field_name_mass[i] + '/' + loc + '_yearly_percentile.csv', index=False)
for loc, lat, lon in locs:
mass_pd_mean = 0
mass_pd_std = 0
mass_pd_percentile = 0
mass_sd_mean = 0
mass_sd_std = 0
mass_sd_percentile = 0
for i in range(len(database_name_mass)):
df = pd.read_csv(field_name_mass[i] + '/' + loc + '_yearly_mean.csv')
df1 = pd.read_csv(field_name_mass[i] + '/' + loc + '_yearly_mean_stddev.csv')
df2 = pd.read_csv(field_name_mass[i] + '/' + loc + '_yearly_percentile.csv')
total_mean = df[field_name_mass[i]].max() if not df.empty else 0
total_mean_std = df1[field_name_mass[i]].max() if not df1.empty else 0
total_percentile = df2[field_name_mass[i]].max() if not df2.empty else 0
with open(dir_path + "\\result.txt", 'a') as f:
f.write("\nfield name(max of all years): " + str(field_name_mass[i]) + " location: " + str(loc) + " mass method 1: " + str(total_mean))
f.write("\nfield name(max of all years): " + str(field_name_mass[i]) + " location: " + str(loc) + " mass method 2: " + str(total_mean_std))
f.write("\nfield name(max of all years): " + str(field_name_mass[i]) + " location: " + str(loc) + " mass method 3: " + str(total_percentile))
if (field_id_mass[i] == 'SSSMASS'):
mass_sd_mean = mass_sd_mean + total_mean
mass_sd_std = mass_sd_std + total_mean_std
mass_sd_percentile = mass_sd_percentile + total_percentile
mass_list_percentile.append(mass_sd_percentile)
print(mass_list_percentile)
# Write pd
pd_list_mean = []
pd_list_percentile = []
for loc, lat, lon in locs:
for i in range(len(database_name)):
print('Cleaning and merging ' + field_name_pd[i] + ' data for ' + loc)
dfs = []
for file in os.listdir(field_name_pd[i] + '/' + loc):
if '.nc4' in file:
try:
with xr.open_mfdataset(field_name_pd[i] + '/' + loc + '/' + file, preprocess=extract_date) as df:
dfs.append(df.to_dataframe())
except:
print('Issue with file ' + file)
if dfs:
df_hourly = pd.concat(dfs)
df_hourly['time'] = df_hourly.index.get_level_values(level=2)
df_hourly.columns = [field_name_pd[i], 'date', 'time']
df_hourly[field_name_pd[i]] = df_hourly[field_name_pd[i]].apply(conversion_function)
df_hourly['date'] = pd.to_datetime(df_hourly['date'])
df_hourly.to_csv(field_name_pd[i] + '/' + loc + '_hourly.csv', header=[field_name_pd[i], 'date', 'time'], index=False)
df_hourly = pd.read_csv(field_name_pd[i] + '/' + loc + '_hourly.csv')
df_daily = df_hourly.groupby('date').agg(aggregator)
df_daily = df_daily.drop('time', axis=1)
df_daily['date'] = df_daily.index
df_daily.to_csv(field_name_pd[i] + '/' + loc + '_daily.csv', header=[field_name_pd[i], 'date'], index=False)
df_weekly = df_daily
df_weekly['Week'] = pd.to_datetime(df_weekly['date']).apply(lambda x: x.isocalendar()[1])
df_weekly['Year'] = pd.to_datetime(df_weekly['date']).apply(lambda x: x.year)
df_weekly = df_weekly.groupby(['Year', 'Week']).agg(aggregator)
df_weekly['Year'] = df_weekly.index.get_level_values(0)
df_weekly['Week'] = df_weekly.index.get_level_values(1)
df_weekly.to_csv(field_name_pd[i] + '/' + loc + '_weekly.csv', index=False)
df_yearly = df_daily
df_yearly_std = df_daily
df_yearly_percentile = df_daily
df_yearly['Year'] = pd.to_datetime(df_yearly['date']).apply(lambda x: x.year)
df_yearly_std['Year'] = pd.to_datetime(df_yearly['date']).apply(lambda x: x.year)
df_yearly_percentile['Year'] = pd.to_datetime(df_yearly['date']).apply(lambda x: x.year)
df_yearly1 = df_yearly_std.groupby(['Year']).agg(mean_plus_std).reset_index() # Reset index to handle re-indexing
df_yearly2 = df_yearly_percentile.groupby(['Year']).agg(percentile_90).reset_index() # Reset index to handle re-indexing
df_yearly = df_yearly.groupby(['Year']).agg(aggregator).reset_index() # Reset index to handle re-indexing
df_yearly.to_csv(field_name_pd[i] + '/' + loc + '_yearly_mean.csv', index=False)
df_yearly1.to_csv(field_name_pd[i] + '/' + loc + '_yearly_mean_stddev.csv', index=False)
df_yearly2.to_csv(field_name_pd[i] + '/' + loc + '_yearly_percentile.csv', index=False)
else:
with open(dir_path + "\\result.txt", 'a') as f:
f.write("\nNo valid files for " + field_name_pd[i] + " at " + loc + ", assuming zero values.")
pd.DataFrame(columns=[field_name_pd[i], 'date']).to_csv(field_name_pd[i] + '/' + loc + '_hourly.csv', index=False)
pd.DataFrame(columns=[field_name_pd[i], 'date']).to_csv(field_name_pd[i] + '/' + loc + '_daily.csv', index=False)
pd.DataFrame(columns=[field_name_pd[i], 'Year']).to_csv(field_name_pd[i] + '/' + loc + '_yearly_mean.csv', index=False)
pd.DataFrame(columns=[field_name_pd[i], 'Year']).to_csv(field_name_pd[i] + '/' + loc + '_yearly_mean_stddev.csv', index=False)
pd.DataFrame(columns=[field_name_pd[i], 'Year']).to_csv(field_name_pd[i] + '/' + loc + '_yearly_percentile.csv', index=False)
for loc, lat, lon in locs:
pd_total_mean = 0
pd_total_mean_std = 0
pd_total_percentile = 0
for i in range(len(database_name)):
df = pd.read_csv(field_name_pd[i] + '/' + loc + '_yearly_mean.csv')
df1 = pd.read_csv(field_name_pd[i] + '/' + loc + '_yearly_mean_stddev.csv')
df2 = pd.read_csv(field_name_pd[i] + '/' + loc + '_yearly_percentile.csv')
total_mean = df[field_name_pd[i]].max() if not df.empty else 0
total_mean_std = df1[field_name_pd[i]].max() if not df1.empty else 0
total_percentile = df2[field_name_pd[i]].max() if not df2.empty else 0
with open(dir_path + "\\result.txt", 'a') as f:
f.write("\nfield name(max of all years): " + str(field_name_pd[i]) + " location: " + str(loc) + " Pd method 1: " + str(total_mean))
f.write("\nfield name(max of all years): " + str(field_name_pd[i]) + " location: " + str(loc) + " Pd method 2: " + str(total_mean_std))
f.write("\nfield name(max of all years): " + str(field_name_pd[i]) + " location: " + str(loc) + " Pd method 3: " + str(total_percentile))
pd_total_mean = pd_total_mean + total_mean
pd_total_mean_std = pd_total_mean_std + total_mean_std
pd_total_percentile = pd_total_percentile + total_percentile
pd_total_mean = round(pd_total_mean * math.pow(10, 6) * 86400, 2)
pd_total_mean_std = round(pd_total_mean_std * math.pow(10, 6) * 86400, 2)
pd_total_percentile = round(pd_total_percentile * math.pow(10, 6) * 86400, 2)
pd_list_mean.append(pd_total_mean)
pd_list_percentile.append(pd_total_percentile)
with open(dir_path + "\\result.txt", 'a') as f:
f.write(f"\nlocation: " + str(loc) + " Pd method 1: " + str(pd_total_mean))
f.write(f"\nlocation: " + str(loc) + " Pd method 2: " + str(pd_total_mean_std))
f.write(f"\nlocation: " + str(loc) + " Pd method 3: " + str(pd_total_percentile))
# Write sd
sd_list_mean = []
sd_list_percentile = []
for loc, lat, lon in locs:
for i in range(len(database_name_sd)):
print('Cleaning and merging ' + field_name_sd[i] + ' data for ' + loc)
dfs = []
for file in os.listdir(field_name_sd[i] + '/' + loc):
if '.nc4' in file:
try:
with xr.open_mfdataset(field_name_sd[i] + '/' + loc + '/' + file, preprocess=extract_date) as df:
dfs.append(df.to_dataframe())
except:
print('Issue with file ' + file)
if dfs:
df_hourly = pd.concat(dfs)
df_hourly['time'] = df_hourly.index.get_level_values(level=2)
df_hourly.columns = [field_name_sd[i], 'date', 'time']
df_hourly[field_name_sd[i]] = df_hourly[field_name_sd[i]].apply(conversion_function)
df_hourly['date'] = pd.to_datetime(df_hourly['date'])
df_hourly.to_csv(field_name_sd[i] + '/' + loc + '_hourly.csv', header=[field_name_sd[i], 'date', 'time'], index=False)
df_hourly = pd.read_csv(field_name_sd[i] + '/' + loc + '_hourly.csv')
df_daily = df_hourly.groupby('date').agg(aggregator)
df_daily = df_daily.drop('time', axis=1)
df_daily['date'] = df_daily.index
df_daily.to_csv(field_name_sd[i] + '/' + loc + '_daily.csv', header=[field_name_sd[i], 'date'], index=False)
df_weekly = df_daily
df_weekly['Week'] = pd.to_datetime(df_weekly['date']).apply(lambda x: x.isocalendar()[1])
df_weekly['Year'] = pd.to_datetime(df_weekly['date']).apply(lambda x: x.year)
df_weekly = df_weekly.groupby(['Year', 'Week']).agg(aggregator)
df_weekly['Year'] = df_weekly.index.get_level_values(0)
df_weekly['Week'] = df_weekly.index.get_level_values(1)
df_weekly.to_csv(field_name_sd[i] + '/' + loc + '_weekly.csv', index=False)
df_yearly = df_daily
df_yearly_std = df_daily
df_yearly_percentile = df_daily
df_yearly['Year'] = pd.to_datetime(df_yearly['date']).apply(lambda x: x.year)
df_yearly_std['Year'] = pd.to_datetime(df_yearly['date']).apply(lambda x: x.year)
df_yearly_percentile['Year'] = pd.to_datetime(df_yearly['date']).apply(lambda x: x.year)
df_yearly1 = df_yearly_std.groupby(['Year']).agg(mean_plus_std).reset_index() # Reset index to handle re-indexing
df_yearly2 = df_yearly_percentile.groupby(['Year']).agg(percentile_90).reset_index() # Reset index to handle re-indexing
df_yearly = df_yearly.groupby(['Year']).agg(aggregator).reset_index() # Reset index to handle re-indexing
df_yearly.to_csv(field_name_sd[i] + '/' + loc + '_yearly_mean.csv', index=False)
df_yearly1.to_csv(field_name_sd[i] + '/' + loc + '_yearly_mean_stddev.csv', index=False)
df_yearly2.to_csv(field_name_sd[i] + '/' + loc + '_yearly_percentile.csv', index=False)
else:
with open(dir_path + "\\result.txt", 'a') as f:
f.write("\nNo valid files for " + field_name_sd[i] + " at " + loc + ", assuming zero values.")
pd.DataFrame(columns=[field_name_sd[i], 'date']).to_csv(field_name_sd[i] + '/' + loc + '_hourly.csv', index=False)
pd.DataFrame(columns=[field_name_sd[i], 'date']).to_csv(field_name_sd[i] + '/' + loc + '_daily.csv', index=False)
pd.DataFrame(columns=[field_name_sd[i], 'Year']).to_csv(field_name_sd[i] + '/' + loc + '_yearly_mean.csv', index=False)
pd.DataFrame(columns=[field_name_sd[i], 'Year']).to_csv(field_name_sd[i] + '/' + loc + '_yearly_mean_stddev.csv', index=False)
pd.DataFrame(columns=[field_name_sd[i], 'Year']).to_csv(field_name_sd[i] + '/' + loc + '_yearly_percentile.csv', index=False)
z = 0
for loc, lat, lon in locs:
sd_total_mean = 0
sd_total_mean_std = 0
sd_total_percentile = 0
for i in range(len(database_name_sd)):
df = pd.read_csv(field_name_sd[i] + '/' + loc + '_yearly_mean.csv')
df1 = pd.read_csv(field_name_sd[i] + '/' + loc + '_yearly_mean_stddev.csv')
df2 = pd.read_csv(field_name_sd[i] + '/' + loc + '_yearly_percentile.csv')
total_mean = df[field_name_sd[i]].max() if not df.empty else 0
total_mean_std = df1[field_name_sd[i]].max() if not df1.empty else 0
total_percentile = df2[field_name_sd[i]].max() if not df2.empty else 0
with open(dir_path + "\\result.txt", 'a') as f:
f.write("\nfield name(max of all years):" + str(field_name_sd[i]) + " location: " + str(loc) + " Sd method1: " + str(total_mean))
f.write("\nfield name(max of all years): " + str(field_name_sd[i]) + " location: " + str(loc) + " Sd method2: " + str(total_mean_std))
f.write("\nfield name(max of all years): " + str(field_name_sd[i]) + " location: " + str(loc) + " Sd method3: " + str(total_percentile))
sd_total_mean = sd_total_mean + total_mean
sd_total_mean_std = sd_total_mean_std + total_mean_std
sd_total_percentile = sd_total_percentile + total_percentile
sd_total_mean = sd_total_mean * math.pow(10, 6) * 86400
sd_total_mean_std = sd_total_mean_std * math.pow(10, 6) * 86400
sd_total_percentile = sd_total_percentile * math.pow(10, 6) * 86400
sd_total_mean = sd_total_mean + (mass_list_percentile[z] * 0.8 * math.pow(10, 9))
sd_total_mean_std = sd_total_mean_std + (mass_list_percentile[z] * 0.8 * math.pow(10, 9))
sd_total_percentile = sd_total_percentile + (mass_list_percentile[z] * 0.8 * math.pow(10, 9))
sd_total_mean = round(sd_total_mean, 2)
sd_total_mean_std = round(sd_total_mean_std, 2)
sd_total_percentile = round(sd_total_percentile, 2)
sd_list_mean.append(sd_total_mean)
sd_list_percentile.append(sd_total_percentile)
z = z + 1
with open(dir_path + "\\result.txt", 'a') as f:
f.write(f"\nlocation: " + str(loc) + " Sd method 1: " + str(sd_total_mean))
f.write(f"\nlocation: " + str(loc) + " Sd method 2: " + str(sd_total_mean_std))
f.write(f"\nlocation: " + str(loc) + " Sd method 3: " + str(sd_total_percentile))
temp_total_list = []
rh_total_list = []
temp_dir = os.path.join(dir_path, 'Air Temperature and RH')
for loc in location:
temp_file_path = os.path.join(temp_dir, f'{loc}.csv')
if os.path.exists(temp_file_path):
temp_data = pd.read_csv(temp_file_path, skiprows=18)
temp_data.columns = ['Timestamp', 'RH', 'Temperature']
temp_data['Timestamp'] = pd.to_datetime(temp_data['Timestamp'])
temp_data.set_index('Timestamp', inplace=True)
temp_monthly = temp_data.resample('M').mean()
temp_total_list.append(temp_monthly['Temperature'].tolist())
rh_total_list.append(temp_monthly['RH'].tolist())
else:
print(f"File not found for location {loc}")
temp_total_list.append([0]*12)
rh_total_list.append([0]*12)
rcorr_90_percentile_list = []
rcorr_90_percentile_list_zinc=[]
rclass_steel_method3_list = []
rclass_steel_method3_list_zinc=[]
th_loss_cs_list = []
th_loss_cs_list_zinc=[]
for i, loc in enumerate(location):
yearly_90_percentiles = []
yearly_90_percentiles_zinc=[]# List to store the 90th percentile of rcorr for each year
rcorr_90_percentile_zinc=[]
num_years = len(temp_total_list[i]) // 12 # Calculate number of years from the total months
for year in range(num_years):
monthly_rcorr = [] # List to store rcorr values for each month in the current year
monthly_rcorr_zinc=[]
for month in range(12):
idx = year * 12 + month # Calculate the index for the current month in the current year
temp = temp_total_list[i][idx]
rh = rh_total_list[i][idx]
# Temperature dependent factor
if temp <= 10:
t = 0.15 * (temp - 10)
else:
t = 0.054 * (-1) * (temp - 10)
# Temperature dependent factor
if temp <= 10:
t_zinc = 0.038 * (temp - 10)
else:
t_zinc = 0.071 * (-1) * (temp - 10)
# Calculate rcorr for the month
rcorr = (1.77 * pd_list_percentile[i] ** 0.52) * math.exp((0.02 * rh) + t) + \
(0.102 * sd_list_percentile[i] ** 0.62) * math.exp((0.033 * rh) + (0.04 * temp))
rcorr_zinc = (0.0129*pd_list_percentile[i] ** 0.44)*math.exp((0.046*rh)+t_zinc)+\
(0.0175*sd_list_percentile[i]** 0.57)*math.exp((0.008*rh)+(0.085*temp))
monthly_rcorr.append(rcorr)
monthly_rcorr_zinc.append(rcorr_zinc)
# Calculate 90th percentile for the current year
rcorr_90_percentile = round(np.percentile(monthly_rcorr, 90), 2)
rcorr_90_percentile_zinc = round(np.percentile(monthly_rcorr_zinc, 90), 2)
yearly_90_percentiles.append(rcorr_90_percentile)
yearly_90_percentiles_zinc.append(rcorr_90_percentile_zinc)
# Calculate the average 90th percentile over all years
avg_90_percentile = round(np.mean(yearly_90_percentiles), 2)
avg_90_percentile_zinc = round(np.mean(yearly_90_percentiles_zinc), 2)
rclass_steel_method3 = rclass_calculation(avg_90_percentile)
rclass_steel_method3_zinc = rclass_calculation_zinc(avg_90_percentile_zinc)
th_loss_cs = round(thickness_loss(lifetime[i], 0.523, avg_90_percentile), 2)
th_loss_cs_zinc = round(thickness_loss(lifetime[i], 0.813, avg_90_percentile_zinc), 2)
# Store the results
rcorr_90_percentile_list.append(avg_90_percentile)
rcorr_90_percentile_list_zinc.append(avg_90_percentile_zinc)
rclass_steel_method3_list.append(rclass_steel_method3)
rclass_steel_method3_list_zinc.append(rclass_steel_method3_zinc)
th_loss_cs_list.append(th_loss_cs)
th_loss_cs_list_zinc.append(th_loss_cs_zinc)
# Print or log the results
print_results(loc, temp_total_list[i], rh_total_list[i], lifetime[i], pd_list_percentile[i], sd_list_percentile[i],
"Steel", rclass_steel_method3, avg_90_percentile, th_loss_cs)
print_results(loc, temp_total_list[i], rh_total_list[i], lifetime[i], pd_list_percentile[i], sd_list_percentile[i],
"Zinc", rclass_steel_method3_list_zinc, avg_90_percentile_zinc, th_loss_cs_zinc)
'''
for i, loc in enumerate(location):
row_cells = table.add_row().cells
row_cells[0].text = loc
row_cells[1].text = str(temp_total_list[i])
row_cells[2].text = str(rh_total_list[i])
row_cells[3].text = str(rcorr_90_percentile_list[i])
row_cells[4].text = str(rcorr_90_percentile_list_zinc[i])
row_cells[5].text = rclass_steel_method3_list[i]
row_cells[6].text = rclass_steel_method3_list_zinc[i]
row_cells[7].text = str(th_loss_cs_list[i])
row_cells[8].text = str(th_loss_cs_list_zinc[i])
doc.save('rcorr.docx')
'''