import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib

base_data = pd.read_csv('./simulation_data/Baseline.csv', index_col='Month')

base_data.head()

base_data.plot()

<Axes: xlabel='Month'>

base_data.columns.name='Scenerio'
base_data.plot(lw=2, colormap='jet', marker='.', markersize=10, 
        title='Cooling Energy Consumption of Baseline Building in Total GWh')

<Axes: title={'center': 'Cooling Energy Consumption of Baseline Building in Total GWh'}, xlabel='Month'>

Aircon = pd.read_csv("./simulation_data/Scenario - Aircon Schedules.csv", index_col="Month")

Aircon

base_data

combined_data = pd.concat([base_data, Aircon], axis=1)

combined_data

combined_data.plot(lw=2, colormap='jet', marker="*", markersize=10,
                  title="Cooling Energy Consumption of Base Model in Total Gwh")

<Axes: title={'center': 'Cooling Energy Consumption of Base Model in Total Gwh'}, xlabel='Month'>

combined_data['Difference'] = combined_data['Baseline'] - combined_data['Scenario - Aircon Schedules']

combined_data

combined_data['Difference'].plot.bar(title='Differene between base Schedule and Improved schedule in Total GWh')

<Axes: title={'center': 'Differene between base Schedule and Improved schedule in Total GWh'}, xlabel='Month'>

list_of_files = ['Scenario - Aircon Schedules.csv',
 'Scenario - Cool roof.csv',
 'Scenario - Rooftop Gardens.csv',
 'Scenario - Increase Setpoint.csv',
 'Scenario - Low-E Glass.csv',
 'Baseline.csv']

data_container = []
for file_name in list_of_files:
    df = pd.read_csv("./simulation_data/" + file_name, index_col='Month')
    data_container.append(df)
    
merged = pd.concat(data_container, axis=1)

merged

merged.info()

<class 'pandas.core.frame.DataFrame'>
Index: 12 entries, January to December
Data columns (total 6 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Scenario - Aircon Schedules   12 non-null     float64
 1   Scenario - Cool roof          12 non-null     float64
 2   Scenario - Rooftop Gardens    12 non-null     float64
 3   Scenario - Increase Setpoint  12 non-null     float64
 4   Scenario - Low-E Glass        12 non-null     float64
 5   Baseline                      12 non-null     float64
dtypes: float64(6)
memory usage: 672.0+ bytes

merged.plot(lw=2, marker='.', markersize=10, 
        title='Cooling Energy Consumption of Baseline Building in Total GWh', figsize=(15,8))

<Axes: title={'center': 'Cooling Energy Consumption of Baseline Building in Total GWh'}, xlabel='Month'>

merged

merged.subtract(merged['Baseline'], axis=0).sum().plot.bar(color='green')

<Axes: >

amelia = pd.read_csv('./meter_data/Office_Amelia.csv', index_col='timestamp')

amelia.head()

amelia.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8760 entries, 2015-01-01 00:00:00 to 2015-12-31 23:00:00
Data columns (total 1 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Office_Amelia  8760 non-null   float64
dtypes: float64(1)
memory usage: 136.9+ KB

amelia.index[0]

'2015-01-01 00:00:00'

amelia = pd.read_csv('./meter_data/Office_Amelia.csv', index_col='timestamp', parse_dates=True)

amelia.head()

amelia.index[0]

Timestamp('2015-01-01 00:00:00')

amelia.plot()

<Axes: xlabel='timestamp'>

amelia.plot(marker=".", linestyle='None', alpha=0.5, figsize=(15, 5))

<Axes: xlabel='timestamp'>

amelia.head()

amelia_daily = amelia.resample("D").mean(numeric_only=True)

amelia_daily.head()

amelia_daily.plot(figsize=(10, 5))

<Axes: xlabel='timestamp'>

amelia_daily.resample('M').mean().plot(figsize=(10, 5))

<Axes: xlabel='timestamp'>

list_of_buildings = [
    'UnivClass_Andy.csv',
    'Office_Alannah.csv',
    'PrimClass_Angel.csv',
    'Office_Penny.csv',
    'UnivLab_Allison.csv',
    'Office_Amelia.csv',
    'UnivClass_Alfredo.csv',
    'Office_Phebian.csv',
    'UnivLab_Adrian.csv',
    'UnivDorm_Curtis.csv',
    'UnivLab_Angie.csv',
    'UnivClass_Amya.csv',
    'UnivLab_Audra.csv',
]

data_container_list = []
for building_name in list_of_buildings:
    df = pd.read_csv("./meter_data/" + building_name, index_col='timestamp', parse_dates=True)
    df = df.resample("D").mean()
    data_container_list.append(df)
all_merged = pd.concat(data_container_list, axis=1)

all_merged.plot(figsize=(20, 30), subplots=True)

array([<Axes: xlabel='timestamp'>, <Axes: xlabel='timestamp'>,
       <Axes: xlabel='timestamp'>, <Axes: xlabel='timestamp'>,
       <Axes: xlabel='timestamp'>, <Axes: xlabel='timestamp'>,
       <Axes: xlabel='timestamp'>, <Axes: xlabel='timestamp'>,
       <Axes: xlabel='timestamp'>, <Axes: xlabel='timestamp'>,
       <Axes: xlabel='timestamp'>, <Axes: xlabel='timestamp'>,
       <Axes: xlabel='timestamp'>], dtype=object)

all_merged.resample("M").mean().plot(figsize=(20,30), subplots=True);

# Here we will take different building
building_name = "Office_Penny"
raw_data = pd.read_csv("./meter_data/"+building_name+".csv", parse_dates=True, index_col='timestamp')

raw_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8760 entries, 2015-01-01 00:00:00 to 2015-12-31 23:00:00
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Office_Penny  8760 non-null   float64
dtypes: float64(1)
memory usage: 136.9 KB

raw_data.plot(figsize=(20, 8))

<Axes: xlabel='timestamp'>

metadata = pd.read_csv('./all_buildings_meta_data.csv', index_col="uid")

metadata.head()

metadata.loc[building_name]

dataend                       31/12/15 23:00
datastart                     01/01/15 00:00
energystarscore                          NaN
heatingtype                              NaN
industry                           Education
mainheatingtype                          NaN
numberoffloors                           NaN
occupants                                NaN
primaryspaceusage                     Office
rating                                   NaN
sqft                                 37485.0
sqm                              3482.468955
subindustry               College/University
timezone                    America/New_York
yearbuilt                                NaN
nickname                               Penny
primaryspaceuse_abbrev                Office
newweatherfilename              weather4.csv
Name: Office_Penny, dtype: object

sqm = metadata.loc[building_name]["sqm"]
sqm

3482.468955

raw_data.head()

normazlied_rawdata = raw_data / sqm

normazlied_rawdata.head()

monthly_normalized = normazlied_rawdata.resample('M').sum()

monthly_normalized.plot(kind="bar", figsize=(10,4), title='Energy Consumption per Square Meter Floor Area')
plt.ylabel("Kwh/m2")

Text(0, 0.5, 'Kwh/m2')

buildingnamelist = [
    "Office_Abbey",
    "Office_Pam",
    "Office_Penny",
    "UnivLab_Allison",
    "UnivLab_Audra",
    "UnivLab_Ciel"]

annual_data_list = []
annual_data_list_normalized = []

for buildingname in buildingnamelist:
    print("Getting data from: "+buildingname)
    
    rawdata = pd.read_csv("./meter_data/"+buildingname+".csv", parse_dates=True, index_col='timestamp')
    floor_area = metadata.loc[buildingname]["sqm"]
    
    annual = rawdata.sum()

    normalized_data = rawdata/floor_area
    annual_normalized = normalized_data.sum()
    
    annual_data_list_normalized.append(annual_normalized)
    annual_data_list.append(annual)

Getting data from: Office_Abbey
Getting data from: Office_Pam
Getting data from: Office_Penny
Getting data from: UnivLab_Allison
Getting data from: UnivLab_Audra
Getting data from: UnivLab_Ciel

totaldata = pd.concat(annual_data_list)
totaldata_normalized = pd.concat(annual_data_list_normalized)

totaldata

Office_Abbey       4.500047e+05
Office_Pam         9.239849e+05
Office_Penny       3.167204e+05
UnivLab_Allison    1.614309e+05
UnivLab_Audra      1.466867e+05
UnivLab_Ciel       3.122984e+06
dtype: float64

totaldata_normalized

Office_Abbey        65.298087
Office_Pam          95.022257
Office_Penny        90.947091
UnivLab_Allison    370.575397
UnivLab_Audra      344.517297
UnivLab_Ciel       143.740305
dtype: float64

totaldata.plot(kind='bar',figsize=(10,5))

<Axes: >

totaldata_normalized.plot(kind='bar',figsize=(10,5))

<Axes: >

%matplotlib inline

ciara_rawdata = pd.read_csv("./meter_data/UnivClass_Ciara.csv", parse_dates=True, index_col='timestamp')

ciara_rawdata.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8760 entries, 2015-01-01 00:00:00 to 2015-12-31 23:00:00
Data columns (total 1 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   UnivClass_Ciara  8760 non-null   float64
dtypes: float64(1)
memory usage: 136.9 KB

rawdata.plot(figsize=(10,4))

<Axes: xlabel='timestamp'>

metadata.loc['UnivClass_Ciara']

dataend                       31/12/15 23:00
datastart                     01/01/15 00:00
energystarscore                          NaN
heatingtype                              NaN
industry                           Education
mainheatingtype                          NaN
numberoffloors                           NaN
occupants                                NaN
primaryspaceusage          College Classroom
rating                                   NaN
sqft                                140488.0
sqm                              13051.75666
subindustry               College/University
timezone                    America/New_York
yearbuilt                               1961
nickname                               Ciara
primaryspaceuse_abbrev             UnivClass
newweatherfilename              weather2.csv
Name: UnivClass_Ciara, dtype: object

weather_data_ciara = pd.read_csv("./weather_data/weather2.csv", index_col='timestamp', parse_dates=True)

weather_data_ciara.head()

weather_data_ciara.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 10914 entries, 2015-01-01 00:56:00 to 2015-12-31 21:56:00
Data columns (total 16 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Conditions             10914 non-null  object 
 1   DateUTC<br />          10914 non-null  object 
 2   Dew PointC             10914 non-null  float64
 3   Events                 2555 non-null   object 
 4   Gust SpeedKm/h         10914 non-null  object 
 5   Humidity               10878 non-null  float64
 6   Precipitationmm        2945 non-null   float64
 7   Sea Level PressurehPa  10914 non-null  float64
 8   TemperatureC           10914 non-null  float64
 9   TimeEDT                7104 non-null   object 
 10  TimeEST                3810 non-null   object 
 11  VisibilityKm           10914 non-null  float64
 12  Wind Direction         10914 non-null  object 
 13  Wind SpeedKm/h         10914 non-null  object 
 14  WindDirDegrees         10914 non-null  int64  
 15  timestamp.1            10914 non-null  object 
dtypes: float64(6), int64(1), object(9)
memory usage: 1.4+ MB

weather_data_ciara['TemperatureC'].plot(figsize=(10, 4))

<Axes: xlabel='timestamp'>

weather_hourly = weather_data_ciara.resample('H').mean(numeric_only=True)

weather_hourly_nooutlier = weather_hourly[weather_hourly > -40]

weather_hourly['TemperatureC'].info()

<class 'pandas.core.series.Series'>
DatetimeIndex: 8758 entries, 2015-01-01 00:00:00 to 2015-12-31 21:00:00
Freq: H
Series name: TemperatureC
Non-Null Count  Dtype  
--------------  -----  
8567 non-null   float64
dtypes: float64(1)
memory usage: 136.8 KB

weather_hourly_nooutlier['TemperatureC'].info()

<class 'pandas.core.series.Series'>
DatetimeIndex: 8758 entries, 2015-01-01 00:00:00 to 2015-12-31 21:00:00
Freq: H
Series name: TemperatureC
Non-Null Count  Dtype  
--------------  -----  
8544 non-null   float64
dtypes: float64(1)
memory usage: 136.8 KB

weather_hourly_nooutlier['TemperatureC'].plot(figsize=(10, 4), color='red')

<Axes: xlabel='timestamp'>

weather_nooutlier_nogaps = weather_hourly_nooutlier.fillna(method='ffill')

weather_nooutlier_nogaps["TemperatureC"].plot(figsize=(10,4))

<Axes: xlabel='timestamp'>

weather_nooutlier_nogaps['TemperatureC'].head()

timestamp
2015-01-01 00:00:00   -7.2
2015-01-01 01:00:00   -6.6
2015-01-01 02:00:00   -6.6
2015-01-01 03:00:00   -6.6
2015-01-01 04:00:00   -6.6
Freq: H, Name: TemperatureC, dtype: float64

rawdata = ciara_rawdata[~ciara_rawdata.index.duplicated(keep='first')]

combined = pd.concat([weather_nooutlier_nogaps['TemperatureC'], rawdata['UnivClass_Ciara']], axis=1)

merged = pd.merge(weather_nooutlier_nogaps['TemperatureC'], rawdata['UnivClass_Ciara'], left_index=True, right_index=True, how='outer')

merged.head()

merged.plot(figsize=(20, 10), subplots=True)

array([<Axes: xlabel='timestamp'>, <Axes: xlabel='timestamp'>],
      dtype=object)

merged.plot(kind='scatter', x='TemperatureC', y='UnivClass_Ciara', figsize=(10,10), color='pink')

<Axes: xlabel='TemperatureC', ylabel='UnivClass_Ciara'>

merged.resample("D").mean().plot(kind='scatter', x='TemperatureC', y='UnivClass_Ciara', figsize=(10,10), color='green')

<Axes: xlabel='TemperatureC', ylabel='UnivClass_Ciara'>

def make_color_division(x):
    if x < 10:
        return "Heating"
    else:
        return "Cooling"

combined = merged.resample("D").mean()

combined.head()

combined['heating_vs_cooling'] = combined.TemperatureC.apply(lambda x: make_color_division(x))

combined.head()

g = sns.lmplot(x="TemperatureC", y="UnivClass_Ciara", hue="heating_vs_cooling",
               truncate=True, data=combined, palette='Set2')

g.set_axis_labels("Outdoor Air Temperature", "Average Hourly kWH")

<seaborn.axisgrid.FacetGrid at 0x22ac32d8ca0>

ashrae_data = pd.read_csv("./ashrae_thermal_comfort_database_2.csv", index_col='Unnamed: 0')

ashrae_data.head()

ashrae_data.describe()

ashrae_data['Clo'].sum()

27548.78

ashrae_data['Clo'].mean()

0.6340632480206223

ashrae_data['Clo'].std()

0.25598709252418184

ashrae_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 43448 entries, 2233 to 104033
Data columns (total 16 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Publication (Citation)           43448 non-null  object 
 1   Year                             43448 non-null  float64
 2   Season                           43448 non-null  object 
 3   Climate                          43448 non-null  object 
 4   City                             43448 non-null  object 
 5   Country                          43448 non-null  object 
 6   Building type                    43448 non-null  object 
 7   Cooling startegy_building level  43448 non-null  object 
 8   Sex                              43448 non-null  object 
 9   Thermal sensation                43448 non-null  float64
 10  Clo                              43448 non-null  float64
 11  Met                              43448 non-null  float64
 12  Air temperature (C)              43448 non-null  float64
 13  Relative humidity (%)            43448 non-null  float64
 14  Air velocity (m/s)               43448 non-null  float64
 15  ThermalSensation_rounded         43448 non-null  float64
dtypes: float64(8), object(8)
memory usage: 5.6+ MB

ashrae_data['Country'].nunique()

17

ashrae_data['Country'].value_counts()

India          15191
Brazil          7074
Australia       6940
USA             2950
UK              2056
Portugal        1558
Greece          1283
Thailand        1146
Sweden           940
Malaysia         869
Canada           867
Singapore        812
France           465
Germany          414
Iran             377
Philippines      277
Italy            229
Name: Country, dtype: int64

ashrae_data['Building type'].value_counts()

Office                 29913
Classroom               6042
Others                  4615
Multifamily housing     2369
Senior center            509
Name: Building type, dtype: int64

ashrae_data['Cooling startegy_building level'].value_counts()

Mixed Mode                 16280
Air Conditioned            14051
Naturally Ventilated       12952
Mechanically Ventilated      165
Name: Cooling startegy_building level, dtype: int64

ashrae_data.groupby("Country").mean(numeric_only=True)

ashrae_data.groupby("Country").median(numeric_only=True)

# total number of counts for each group
ashrae_data.groupby("Country").size()

Country
Australia       6940
Brazil          7074
Canada           867
France           465
Germany          414
Greece          1283
India          15191
Iran             377
Italy            229
Malaysia         869
Philippines      277
Portugal        1558
Singapore        812
Sweden           940
Thailand        1146
UK              2056
USA             2950
dtype: int64

ashrae_data[['Air temperature (C)', 'Relative humidity (%)']].boxplot()

<Axes: >

color = {'boxes': 'Blue', 'whiskers': 'purple',  'medians': 'DarkBlue', 'caps': 'Gray'}
ashrae_data[['Air temperature (C)','Relative humidity (%)']].plot.box(color=color, sym='r+', figsize=(10,5), vert=False)

<Axes: >

ashrae_data[['Air temperature (C)','Relative humidity (%)','Country']].groupby('Country').boxplot(figsize=(20,20));

ashrae_data.hist(figsize=(10, 10));

# Density plots
ashrae_data['Air temperature (C)'].plot.kde()

<Axes: ylabel='Density'>

ashrae_data.plot.scatter(x='Air temperature (C)', y='Relative humidity (%)', c='ThermalSensation_rounded', figsize=(15,10));

from pandas.plotting import parallel_coordinates
ashrae_data[['Air temperature (C)','Relative humidity (%)','ThermalSensation_rounded','Air velocity (m/s)']].iloc[:500].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 500 entries, 2233 to 2732
Data columns (total 4 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Air temperature (C)       500 non-null    float64
 1   Relative humidity (%)     500 non-null    float64
 2   ThermalSensation_rounded  500 non-null    float64
 3   Air velocity (m/s)        500 non-null    float64
dtypes: float64(4)
memory usage: 19.5 KB

parallel_coordinates(ashrae_data[['Air temperature (C)','Relative humidity (%)','ThermalSensation_rounded','Air velocity (m/s)']].iloc[:500], 'ThermalSensation_rounded');

ashrae_data = pd.read_csv("ashrae_thermal_comfort_database_2.csv", index_col='Unnamed: 0')

ashrae_data.head()

fig, ax = plt.subplots()
# the size of A4 paper
fig.set_size_inches(10,10)
sns.boxplot(x="ThermalSensation_rounded", y="Air temperature (C)", data=ashrae_data)

<Axes: xlabel='ThermalSensation_rounded', ylabel='Air temperature (C)'>

fig, ax = plt.subplots()
# the size of A4 paper
fig.set_size_inches(10,10)
sns.boxplot(data=ashrae_data, x="ThermalSensation_rounded", y="Relative humidity (%)")

<Axes: xlabel='ThermalSensation_rounded', ylabel='Relative humidity (%)'>

fig, ax = plt.subplots()
# the size of A4 paper
fig.set_size_inches(10,10)
sns.boxplot(data=ashrae_data, x="ThermalSensation_rounded", y="Air velocity (m/s)")

<Axes: xlabel='ThermalSensation_rounded', ylabel='Air velocity (m/s)'>

fig, ax = plt.subplots()
# the size of A4 paper
fig.set_size_inches(10,10)
sns.boxplot( data=ashrae_data, x="ThermalSensation_rounded", y="Air temperature (C)", hue="Sex")

<Axes: xlabel='ThermalSensation_rounded', ylabel='Air temperature (C)'>

ashrae_data["Met_rounded"]= ashrae_data["Met"].round(0)
ashrae_data["Clo_rounded"]= ashrae_data["Clo"].round(0)

fig, ax = plt.subplots()
# the size of A4 paper
fig.set_size_inches(10,10)
sns.boxplot(data=ashrae_data, x="ThermalSensation_rounded", y="Air temperature (C)", hue="Met_rounded")

<Axes: xlabel='ThermalSensation_rounded', ylabel='Air temperature (C)'>

fig, ax = plt.subplots()
# the size of A4 paper
fig.set_size_inches(10,10)
sns.boxplot(y="Building type", x="Thermal sensation",
             palette="Pastel1",
            data=ashrae_data[["Building type","Thermal sensation","Sex"]].dropna())

sns.despine(offset=10, trim=True)

fig, ax = plt.subplots()
# the size of A4 paper
fig.set_size_inches(10,10)
sns.boxplot(y="Cooling startegy_building level", x="Thermal sensation",
             palette="Pastel1",
            data=ashrae_data[["Cooling startegy_building level","Thermal sensation"]].dropna())

sns.despine(offset=10, trim=True)

fig, ax = plt.subplots()
# the size of A4 paper
fig.set_size_inches(10,10)
sns.boxplot(y="Season", x="Thermal sensation",
             palette="Pastel1",
            data=ashrae_data[["Season","Thermal sensation"]].dropna())

sns.despine(offset=10, trim=True)

fig, ax = plt.subplots()
# the size of A4 paper
fig.set_size_inches(15,20)
sns.boxplot(y="Country", x="Air temperature (C)",
            hue="ThermalSensation_rounded", palette="Set3",
            data=ashrae_data[["Country","Air temperature (C)","ThermalSensation_rounded"]].dropna())
plt.title("Thermal Sensation")
sns.despine(offset=10, trim=True)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn import metrics
from sklearn.neighbors import KNeighborsRegressor
from scipy.cluster.vq import kmeans, vq, whiten
from scipy.spatial.distance import cdist
from datetime import datetime

df = pd.read_csv('./meter_data/Office_Amelia.csv', index_col = "timestamp", parse_dates=True)

df.head()

df.plot(alpha=0.5, figsize=(15, 5))
plt.title("Electricity Consumption")
plt.xlabel("Time Range")
plt.ylabel("kWh Electricity Consumption Visualization");

df.index[0]

Timestamp('2015-01-01 00:00:00')

df.truncate(before='2015-02-01', after='2015-02-14').plot(figsize=(15,5))
plt.title("Electricity Consumption")
plt.xlabel("Time Range")
plt.ylabel("kWh Electricity Consumption Visualization");

df['Date'] = df.index.map(lambda t: t.date())
df['Time'] = df.index.map(lambda t: t.time())

df.head()

df_pivot = pd.pivot_table(df, values='Office_Amelia', index='Date', columns='Time')
df_pivot.head()

df_pivot.T.plot(legend=False, figsize=(15,5), color='r', alpha=0.1, xticks=np.arange(0, 86400, 10800))
plt.title("Electrical Meter Data - Daily Profiles")
plt.xlabel("Daily Time Frame")
plt.ylabel("kWh Electricity");

test = df.index[0].date()
test.weekday()

3

df['Weekday'] = df.index.map(lambda t: t.date().weekday())

df.head()

df_pivot_weekday = pd.pivot_table(df[(df.Weekday < 5)], values='Office_Amelia', index='Date', columns='Time')

df_pivot_weekday.T.plot(legend=False, figsize=(15,5), color='k', alpha=0.1, xticks=np.arange(0, 86400, 10800))
plt.title("Electrical Meter Data - Weekday Daily Profiles")
plt.xlabel("Daily Time Frame")
plt.ylabel("kWh Electricity");

df_pivot_weekend = pd.pivot_table(df[(df.Weekday > 5)], values='Office_Amelia', index='Date', columns='Time')
df_pivot_weekend.T.plot(legend=False, figsize=(15,5), color='k', alpha=0.1, xticks=np.arange(0, 86400, 10800))
plt.title("Electrical Meter Data - Weekday Daily Profiles")
plt.xlabel("Daily Time Frame")
plt.ylabel("kWh Electricity");

df = pd.read_csv('./meter_data/Office_Amelia.csv', index_col = "timestamp", parse_dates=True)

df_norm = (df - df.mean()) / (df.max() - df.min()) 

df['Time'] = df.index.map(lambda t: t.time())
df['Date'] = df.index.map(lambda t: t.date())
df_norm['Time'] = df_norm.index.map(lambda t: t.time())
df_norm['Date'] = df_norm.index.map(lambda t: t.date())

dailyblocks = pd.pivot_table(df, values='Office_Amelia', index='Date', columns='Time', aggfunc='mean')
dailyblocks_norm = pd.pivot_table(df_norm, values='Office_Amelia', index='Date', columns='Time', aggfunc='mean')

dailyblocksmatrix_norm = np.matrix(dailyblocks_norm.dropna())
centers, _ = kmeans(dailyblocksmatrix_norm, 4, iter=10000)
cluster, _ = vq(dailyblocksmatrix_norm, centers)

clusterdf = pd.DataFrame(cluster, columns=['ClusterNo'])

dailyclusters = pd.concat([dailyblocks.dropna().reset_index(), clusterdf], axis=1)

dailyclusters.head()

x = dailyclusters.groupby('ClusterNo').mean(numeric_only=True).sum(axis=1).sort_values()
x = pd.DataFrame(x.reset_index())
x['ClusterNo2'] = x.index
x = x.set_index('ClusterNo')
x = x.drop([0], axis=1)
dailyclusters = dailyclusters.merge(x, how='outer', left_on='ClusterNo', right_index=True)

dailyclusters = dailyclusters.drop(['ClusterNo'],axis=1)
dailyclusters = dailyclusters.set_index(['ClusterNo2','Date']).T.sort_index()

dailyclusters.head()

clusterlist = list(dailyclusters.columns.get_level_values(0).unique())
matplotlib.rcParams['figure.figsize'] = 20, 7

styles2 = ['LightSkyBlue', 'b','LightGreen', 'g','LightCoral','r','SandyBrown','Orange','Plum','Purple','Gold','b']
fig, ax = plt.subplots()
for col, style in zip(clusterlist, styles2):
    dailyclusters[col].plot(ax=ax, legend=False, style=style, alpha=0.1, xticks=np.arange(0, 86400, 10800))

ax.set_ylabel('Total Daily Profile')
ax.set_xlabel('Time of Day');

def timestampcombine(date,time):
    pydatetime = datetime.combine(date, time)
    return pydatetime

def ClusterUnstacker(df):
    df = df.unstack().reset_index()
    df['timestampstring'] = pd.to_datetime(df.Date.astype("str") + " " + df.level_2.astype("str"))
    #pd.to_datetime(df.Date  df.level_2) #map(timestampcombine, )
    df = df.dropna()
    return df

dailyclusters.unstack().reset_index().head()

dfclusterunstacked = ClusterUnstacker(dailyclusters)
dfclusterunstackedpivoted = pd.pivot_table(dfclusterunstacked, values=0, index='timestampstring', columns='ClusterNo2')

clusteravgplot = dfclusterunstackedpivoted.resample('D').sum().replace(0, np.nan).plot(style="^",markersize=15)
clusteravgplot.set_ylabel('Daily Totals kWh')
clusteravgplot.set_xlabel('Date');

df_prediction_data = pd.read_csv("./meter_data/UnivClass_Ciara.csv", parse_dates=True, index_col='timestamp')

df_prediction_data.head()

df_prediction_data.plot()

<Axes: xlabel='timestamp'>

weather_data = pd.read_csv("./weather_data/weather2.csv", index_col='timestamp', parse_dates=True)

weather_data.head()

weather_houly = weather_data.resample('H').mean(numeric_only=True)
weather_houly_nooutlier = weather_houly[weather_houly > -40]
weather_houly_nooutlier_nogaps = weather_houly_nooutlier.fillna(method='ffill')

temperature = weather_houly_nooutlier_nogaps['TemperatureC']
temperature.plot()

<Axes: xlabel='timestamp'>

training_months = [4,5,6]
test_months = [7]

trainingdata = df_prediction_data[df_prediction_data.index.month.isin(training_months)]
testdata = df_prediction_data[df_prediction_data.index.month.isin(test_months)]

trainingdata.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2184 entries, 2015-04-01 00:00:00 to 2015-06-30 23:00:00
Data columns (total 1 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   UnivClass_Ciara  2184 non-null   float64
dtypes: float64(1)
memory usage: 34.1 KB

testdata.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 744 entries, 2015-07-01 00:00:00 to 2015-07-31 23:00:00
Data columns (total 1 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   UnivClass_Ciara  744 non-null    float64
dtypes: float64(1)
memory usage: 11.6 KB

train_features = pd.concat([pd.get_dummies(trainingdata.index.hour), 
                                     pd.get_dummies(trainingdata.index.dayofweek), 
                                     pd.DataFrame(temperature[temperature.index.month.isin(training_months)].values)], axis=1).dropna()

train_features.head()

print(train_features.shape, trainingdata.shape)

(2184, 32) (2184, 1)

model = KNeighborsRegressor()
model.fit(np.array(train_features), np.array(trainingdata.values))

KNeighborsRegressor()

KNeighborsRegressor()

test_features = np.array(pd.concat([pd.get_dummies(testdata.index.hour),
                                    pd.get_dummies(testdata.index.dayofweek),
                                    pd.DataFrame(temperature[temperature.index.month.isin(test_months)].values)], axis=1).dropna())

# predictions
preds = model.predict(test_features)

predicted_vs_actual = pd.concat([testdata, pd.DataFrame(preds, index=testdata.index)], axis=1)

predicted_vs_actual = preds_vs_actual.rename(columns={"UnivClass_Ciara":"Actual", 0: "Predicted"})

preds_vs_actual.plot()

<Axes: xlabel='timestamp'>

trainingdata = trainingdata.rename(columns={"UnivClass_Ciara":"Actual"})
trainingdata.head()

predicted_vs_actual_training = pd.concat([trainingdata, preds_vs_actual], sort=True)

predicted_vs_actual_training.plot()

<Axes: xlabel='timestamp'>

error = abs(predicted_vs_actual['Actual'] - predicted_vs_actual['Predicted'])
MAPE = sklearn.metrics.mean_absolute_percentage_error(predicted_vs_actual['Actual'], predicted_vs_actual['Predicted'])
MAPE * 100

6.954645494388625

from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_curve, auc, precision_recall_curve
import joblib
from sklearn.model_selection import RandomizedSearchCV
from sklearn.feature_selection import SelectKBest

ieq_data = pd.read_csv("./ashrae_thermal_comfort_database_2.csv", index_col='Unnamed: 0')

ieq_data.head()

ieq_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 43448 entries, 2233 to 104033
Data columns (total 16 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Publication (Citation)           43448 non-null  object 
 1   Year                             43448 non-null  float64
 2   Season                           43448 non-null  object 
 3   Climate                          43448 non-null  object 
 4   City                             43448 non-null  object 
 5   Country                          43448 non-null  object 
 6   Building type                    43448 non-null  object 
 7   Cooling startegy_building level  43448 non-null  object 
 8   Sex                              43448 non-null  object 
 9   Thermal sensation                43448 non-null  float64
 10  Clo                              43448 non-null  float64
 11  Met                              43448 non-null  float64
 12  Air temperature (C)              43448 non-null  float64
 13  Relative humidity (%)            43448 non-null  float64
 14  Air velocity (m/s)               43448 non-null  float64
 15  ThermalSensation_rounded         43448 non-null  float64
dtypes: float64(8), object(8)
memory usage: 5.6+ MB

ieq_data["ThermalSensation_rounded"].value_counts()

 0.0    19537
 1.0     8396
-1.0     7693
 2.0     3657
-2.0     2459
 3.0     1265
-3.0      441
Name: ThermalSensation_rounded, dtype: int64

list(ieq_data.columns)

['Publication (Citation)',
 'Year',
 'Season',
 'Climate',
 'City',
 'Country',
 'Building type',
 'Cooling startegy_building level',
 'Sex',
 'Thermal sensation',
 'Clo',
 'Met',
 'Air temperature (C)',
 'Relative humidity (%)',
 'Air velocity (m/s)',
 'ThermalSensation_rounded']

feature_columns = [
 'Year',
 'Season',
 'Climate',
 'City',
 'Country',
 'Building type',
 'Cooling startegy_building level',
 'Sex',
 'Clo',
 'Met',
 'Air temperature (C)',
 'Relative humidity (%)',
 'Air velocity (m/s)']

features = ieq_data[feature_columns]
features.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 43448 entries, 2233 to 104033
Data columns (total 13 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Year                             43448 non-null  float64
 1   Season                           43448 non-null  object 
 2   Climate                          43448 non-null  object 
 3   City                             43448 non-null  object 
 4   Country                          43448 non-null  object 
 5   Building type                    43448 non-null  object 
 6   Cooling startegy_building level  43448 non-null  object 
 7   Sex                              43448 non-null  object 
 8   Clo                              43448 non-null  float64
 9   Met                              43448 non-null  float64
 10  Air temperature (C)              43448 non-null  float64
 11  Relative humidity (%)            43448 non-null  float64
 12  Air velocity (m/s)               43448 non-null  float64
dtypes: float64(6), object(7)
memory usage: 4.6+ MB

target = ieq_data['ThermalSensation_rounded']
target.head()

2233   -2.0
2234   -1.0
2235   -2.0
2236   -2.0
2237    0.0
Name: ThermalSensation_rounded, dtype: float64

features.head()

features_withdummies = pd.get_dummies(features)
features_withdummies.head()

# X_train, X_test, y_train, y_test = train_test_split(
#     X, y, test_size=0.33, random_state=42)
features_train, features_test, target_train, target_test = train_test_split(features_withdummies, target, test_size=0.3, random_state=2)

rf_model = RandomForestClassifier(n_estimators = 100, min_samples_leaf = 2, random_state = 2, max_features = 'auto', oob_score=True)

rf_model

RandomForestClassifier(max_features='auto', min_samples_leaf=2, oob_score=True,
                       random_state=2)

RandomForestClassifier(max_features='auto', min_samples_leaf=2, oob_score=True,
                       random_state=2)

rf_model.fit(features_train, target_train)

C:\Users\kamal\anaconda3\anaconda-py\envs\data-science\lib\site-packages\sklearn\ensemble\_forest.py:424: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
  warn(

RandomForestClassifier(max_features='auto', min_samples_leaf=2, oob_score=True,
                       random_state=2)

RandomForestClassifier(max_features='auto', min_samples_leaf=2, oob_score=True,
                       random_state=2)

mean_model_accuracy = rf_model.oob_score_

print("Model accuracy: "+str(mean_model_accuracy))

Model accuracy: 0.4872587380396541

#Dummy Classifier model to get a baseline
baseline_rf = DummyClassifier(strategy='stratified',random_state=0)
baseline_rf.fit(features_train, target_train)
#DummyClassifier(constant=None, random_state=1, strategy='most_frequent')
baseline_model_accuracy = baseline_rf.score(features_test, target_test)
print("Model accuracy: "+str(baseline_model_accuracy))

Model accuracy: 0.2833908707326429

y_pred = rf_model.predict(features_test)
y_true = np.array(target_test)
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

        -3.0       0.50      0.03      0.06       135
        -2.0       0.34      0.09      0.15       791
        -1.0       0.37      0.29      0.33      2252
         0.0       0.54      0.79      0.64      5863
         1.0       0.40      0.27      0.33      2499
         2.0       0.39      0.24      0.30      1079
         3.0       0.47      0.19      0.27       416

    accuracy                           0.49     13035
   macro avg       0.43      0.27      0.30     13035
weighted avg       0.46      0.49      0.45     13035

importances = rf_model.feature_importances_
# std = np.std([tree.feature_importances_ for tree in rf_model.estimators_], axis=0)
indices = np.argsort(importances)[::-1]

# Print the feature ranking
print("Feature ranking:")

for f in range(features_withdummies.shape[1]):
    print("%d. feature %s (%f)" % (f + 1, features_withdummies.columns[indices[f]], importances[indices[f]]))

Feature ranking:
1. feature Air temperature (C) (0.227244)
2. feature Relative humidity (%) (0.192509)
3. feature Air velocity (m/s) (0.153570)
4. feature Clo (0.148443)
5. feature Met (0.069358)
6. feature Year (0.018625)
7. feature Sex_Female (0.016522)
8. feature Sex_Male (0.016083)
9. feature Season_Summer (0.011295)
10. feature Season_Winter (0.009701)
11. feature Season_Spring (0.007988)
12. feature Season_Autumn (0.007010)
13. feature Cooling startegy_building level_Air Conditioned (0.006721)
14. feature Cooling startegy_building level_Naturally Ventilated (0.006303)
15. feature Cooling startegy_building level_Mixed Mode (0.006215)
16. feature Climate_Tropical wet savanna (0.005513)
17. feature Building type_Office (0.004602)
18. feature Country_India (0.004321)
19. feature Climate_Hot semi-arid (0.004083)
20. feature City_Chennai (0.003510)
21. feature Climate_Subtropical highland (0.003073)
22. feature Building type_Others (0.002585)
23. feature City_Shimla (0.002564)
24. feature Building type_Classroom (0.002368)
25. feature Country_Australia (0.002255)
26. feature Country_UK (0.002091)
27. feature Climate_Humid subtropical (0.002077)
28. feature City_London (0.002061)
29. feature City_Jaipur (0.001996)
30. feature Building type_Multifamily housing (0.001977)
31. feature Country_Brazil (0.001842)
32. feature Country_Greece (0.001774)
33. feature Climate_Temperate oceanic (0.001710)
34. feature City_Delhi (0.001704)
35. feature City_Athens (0.001687)
36. feature Climate_Warm-summer humid continental (0.001645)
37. feature City_Hyderabad (0.001636)
38. feature Country_Sweden (0.001635)
39. feature City_Florianopolis (0.001520)
40. feature Country_Portugal (0.001516)
41. feature Climate_Monsoon-influenced humid subtropical (0.001414)
42. feature City_San Francisco (0.001406)
43. feature City_Ahmedabad (0.001362)
44. feature City_Darwin (0.001342)
45. feature Country_Philippines (0.001291)
46. feature City_Bangalore (0.001210)
47. feature Climate_Hot-summer mediterranean (0.001183)
48. feature City_Townsville (0.001179)
49. feature Climate_Cool-summer mediterranean (0.001141)
50. feature Country_USA (0.001107)
51. feature Building type_Senior center (0.001062)
52. feature City_Montreal (0.001049)
53. feature Climate_Oceanic (0.000989)
54. feature City_Melbourne (0.000988)
55. feature City_Makati (0.000975)
56. feature Climate_Hot-summer Mediterranean (0.000926)
57. feature City_San Ramon (0.000912)
58. feature Climate_Warm-summer Mediterranean (0.000907)
59. feature City_Brisbane (0.000851)
60. feature City_Porto (0.000834)
61. feature City_Lyon (0.000831)
62. feature City_Sydney (0.000796)
63. feature Climate_Tropical rainforest (0.000766)
64. feature Country_Canada (0.000766)
65. feature City_Oxford (0.000756)
66. feature City_Bangkok (0.000738)
67. feature City_Cardiff (0.000731)
68. feature Country_Thailand (0.000713)
69. feature Country_France (0.000702)
70. feature City_Walnut Creek (0.000702)
71. feature City_Karlsruhe (0.000660)
72. feature City_Gothenburg (0.000642)
73. feature City_Ilam (0.000635)
74. feature City_Maceio (0.000630)
75. feature Country_Iran (0.000598)
76. feature City_Palo Alto (0.000585)
77. feature Climate_Tropical monsoon (0.000573)
78. feature Country_Germany (0.000556)
79. feature City_Singapore (0.000544)
80. feature City_Berkeley (0.000532)
81. feature City_Kota Kinabalu (0.000495)
82. feature Country_Malaysia (0.000475)
83. feature Cooling startegy_building level_Mechanically Ventilated (0.000475)
84. feature Country_Singapore (0.000435)
85. feature City_Kalgoorlie (0.000415)
86. feature City_Wollongong (0.000395)
87. feature City_Hampshire (0.000316)
88. feature Country_Italy (0.000246)
89. feature City_Malmo (0.000238)
90. feature City_Lisbon (0.000224)
91. feature City_Goulburn (0.000199)
92. feature City_Halmstad (0.000195)
93. feature City_Auburn (0.000193)
94. feature City_Varese (0.000163)
95. feature City_Putra Jaya (0.000133)
96. feature City_Bedong (0.000128)
97. feature City_Lodi (0.000107)
98. feature City_Imola (0.000104)
99. feature City_Beverly Hills (0.000054)
100. feature City_Kinarut (0.000039)
101. feature City_Kuching (0.000038)
102. feature City_Kuala Lumpur (0.000015)

# Plot the feature importances of the forest
plt.figure(figsize=(15,6))
plt.title("Feature Importances")
plt.barh(range(15), importances[indices][:15], align="center")
plt.yticks(range(15), features_withdummies.columns[indices][:15])#
plt.gca().invert_yaxis()
plt.tight_layout(pad=0.4)
plt.show()

importances[indices][:15]

array([0.22724414, 0.19250949, 0.1535699 , 0.14844308, 0.06935753,
       0.01862525, 0.01652153, 0.01608305, 0.01129537, 0.00970149,
       0.00798764, 0.00701028, 0.00672132, 0.00630335, 0.00621502])

sns.set(font_scale=1.4)
cm = confusion_matrix(y_true, y_pred)
np.set_printoptions(precision=2)
print('Confusion matrix, without normalization')
print(cm)
plt.figure(figsize=(12,10))

Confusion matrix, without normalization
[[   4   10   33   73   10    1    4]
 [   1   74  229  427   48    8    4]
 [   3   62  662 1363  135   23    4]
 [   0   46  570 4617  499  111   20]
 [   0   14  183 1438  683  163   18]
 [   0    9   70  453  249  259   39]
 [   0    5   32  123   71  105   80]]

<Figure size 1200x1000 with 0 Axes>

<Figure size 1200x1000 with 0 Axes>

def plot_confusion_matrix(cm, categories, title='Confusion matrix', cmap='Reds'):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(categories))
    plt.xticks(tick_marks,categories, rotation=90)
    plt.yticks(tick_marks,categories)
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

categories = np.array(target.sort_values().unique())
plot_confusion_matrix(cm, categories)

cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print('Normalized confusion matrix')
print(cm_normalized)
plt.figure(figsize=(12,10))
plot_confusion_matrix(cm_normalized, categories, title='Normalized Classification Error Matrix')
plt.show()

Normalized confusion matrix
[[0.03 0.07 0.24 0.54 0.07 0.01 0.03]
 [0.   0.09 0.29 0.54 0.06 0.01 0.01]
 [0.   0.03 0.29 0.61 0.06 0.01 0.  ]
 [0.   0.01 0.1  0.79 0.09 0.02 0.  ]
 [0.   0.01 0.07 0.58 0.27 0.07 0.01]
 [0.   0.01 0.06 0.42 0.23 0.24 0.04]
 [0.   0.01 0.08 0.3  0.17 0.25 0.19]]

	Baseline
Month
January	5.69
February	6.75
March	10.64
April	13.60
May	19.34

	Scenario - Aircon Schedules
Month
January	5.61
February	6.50
March	9.70
April	11.95
May	16.52
June	18.89
July	22.13
August	22.14
September	20.38
October	15.87
November	11.71
December	7.16

Scenerio	Baseline
Month
January	5.69
February	6.75
March	10.64
April	13.60
May	19.34
June	22.64
July	27.02
August	27.13
September	25.04
October	18.47
November	13.02
December	7.53

	Baseline	Scenario - Aircon Schedules
Month
January	5.69	5.61
February	6.75	6.50
March	10.64	9.70
April	13.60	11.95
May	19.34	16.52
June	22.64	18.89
July	27.02	22.13
August	27.13	22.14
September	25.04	20.38
October	18.47	15.87
November	13.02	11.71
December	7.53	7.16

	Baseline	Scenario - Aircon Schedules	Difference
Month
January	5.69	5.61	0.08
February	6.75	6.50	0.25
March	10.64	9.70	0.94
April	13.60	11.95	1.65
May	19.34	16.52	2.82
June	22.64	18.89	3.75
July	27.02	22.13	4.89
August	27.13	22.14	4.99
September	25.04	20.38	4.66
October	18.47	15.87	2.60
November	13.02	11.71	1.31
December	7.53	7.16	0.37

	Office_Amelia
timestamp
2015-01-01 00:00:00	3.96
2015-01-01 01:00:00	4.44
2015-01-01 02:00:00	4.82
2015-01-01 03:00:00	4.28
2015-01-01 04:00:00	18.79

	Office_Amelia
timestamp
2015-01-01	8.323750
2015-01-02	7.865417
2015-01-03	3.695417
2015-01-04	3.680417
2015-01-05	8.948333

	dataend	datastart	energystarscore	heatingtype	industry	mainheatingtype	numberoffloors	occupants	primaryspaceusage	rating	sqft	sqm	subindustry	timezone	yearbuilt	nickname	primaryspaceuse_abbrev	newweatherfilename
uid
PrimClass_Everett	31/12/12 23:00	01/01/12 00:00	NaN	NaN	Education	NaN	NaN	NaN	Primary/Secondary Classroom	NaN	105530.0	9804.053590	Primary/Secondary School	America/New_York	NaN	Everett	PrimClass	weather12.csv
UnivClass_Clifford	31/12/15 23:00	01/01/15 00:00	NaN	NaN	Education	NaN	NaN	NaN	College Classroom	NaN	56969.0	5292.591007	College/University	America/New_York	1967	Clifford	UnivClass	weather2.csv
Office_Elizabeth	31/12/12 23:00	01/01/12 00:00	NaN	NaN	Commercial Property	NaN	NaN	NaN	Office	NaN	294651.0	27373.961850	Commercial Real Estate	America/Los_Angeles	NaN	Elizabeth	Office	weather22.csv
Office_Ellie	31/12/12 23:00	01/01/12 00:00	NaN	NaN	Commercial Property	NaN	NaN	NaN	Office	NaN	496517.0	46127.918850	Bank/Financial Services	America/Los_Angeles	NaN	Ellie	Office	weather28.csv
PrimClass_Elisabeth	31/12/12 23:00	01/01/12 00:00	NaN	NaN	Education	NaN	NaN	NaN	Primary/Secondary Classroom	NaN	233062.0	21652.158990	Primary/Secondary School	America/New_York	NaN	Elisabeth	PrimClass	weather23.csv

	Office_Penny
timestamp
2015-01-01 00:00:00	25.275000
2015-01-01 01:00:00	25.625000
2015-01-01 02:00:00	25.158333
2015-01-01 03:00:00	24.900000
2015-01-01 04:00:00	26.358333

	Office_Penny
timestamp
2015-01-01 00:00:00	0.007258
2015-01-01 01:00:00	0.007358
2015-01-01 02:00:00	0.007224
2015-01-01 03:00:00	0.007150
2015-01-01 04:00:00	0.007569

	Conditions	DateUTC<br />	Dew PointC	Events	Gust SpeedKm/h	Humidity	Precipitationmm	Sea Level PressurehPa	TemperatureC	TimeEDT	TimeEST	VisibilityKm	Wind Direction	Wind SpeedKm/h	WindDirDegrees	timestamp.1
timestamp
2015-01-01 00:56:00	Overcast	2015-01-01 05:56:00<br />	-15.0	NaN	-	54.0	NaN	1017.4	-7.2	NaN	12:56 AM	16.1	SW	18.5	230	2015-01-01 00:56:00
2015-01-01 01:56:00	Overcast	2015-01-01 06:56:00<br />	-14.4	NaN	-	55.0	NaN	1017.2	-6.6	NaN	1:56 AM	16.1	WSW	14.8	240	2015-01-01 01:56:00
2015-01-01 02:56:00	Overcast	2015-01-01 07:56:00<br />	-14.4	NaN	-	55.0	NaN	1016.9	-6.6	NaN	2:56 AM	16.1	SW	14.8	220	2015-01-01 02:56:00
2015-01-01 03:56:00	Overcast	2015-01-01 08:56:00<br />	-14.4	NaN	-	55.0	NaN	1017.2	-6.6	NaN	3:56 AM	16.1	SW	18.5	220	2015-01-01 03:56:00
2015-01-01 04:56:00	Overcast	2015-01-01 09:56:00<br />	-14.4	NaN	-	55.0	NaN	1017.2	-6.6	NaN	4:56 AM	16.1	SSW	14.8	210	2015-01-01 04:56:00

	TemperatureC	UnivClass_Ciara
timestamp
2015-01-01	-3.027083	149.362500
2015-01-02	-1.704167	151.513542
2015-01-03	-1.707639	147.725000
2015-01-04	7.135417	148.089584
2015-01-05	-7.598958	163.968750

	Publication (Citation)	Year	Season	Climate	City	Country	Building type	Cooling startegy_building level	Sex	Thermal sensation	Clo	Met	Air temperature (C)	Relative humidity (%)	Air velocity (m/s)	ThermalSensation_rounded
2233	Indraganti, Madhavi, et al. "Adaptive model of...	2012.0	Winter	Tropical wet savanna	Chennai	India	Office	Mixed Mode	Female	-2.0	0.75	1.0	25.2	64.0	0.1	-2.0
2234	Indraganti, Madhavi, et al. "Adaptive model of...	2012.0	Winter	Tropical wet savanna	Chennai	India	Office	Mixed Mode	Male	-1.0	0.64	1.0	25.2	64.0	0.1	-1.0
2235	Indraganti, Madhavi, et al. "Adaptive model of...	2012.0	Winter	Tropical wet savanna	Chennai	India	Office	Mixed Mode	Male	-2.0	0.64	1.0	25.2	64.0	0.1	-2.0
2236	Indraganti, Madhavi, et al. "Adaptive model of...	2012.0	Winter	Tropical wet savanna	Chennai	India	Office	Mixed Mode	Female	-2.0	0.75	1.0	25.2	64.0	0.1	-2.0
2237	Indraganti, Madhavi, et al. "Adaptive model of...	2012.0	Winter	Tropical wet savanna	Chennai	India	Office	Mixed Mode	Male	0.0	0.72	1.0	25.2	64.0	0.1	0.0

	Year	Thermal sensation	Clo	Met	Air temperature (C)	Relative humidity (%)	Air velocity (m/s)	ThermalSensation_rounded
count	43448.000000	43448.000000	43448.000000	43448.000000	43448.000000	43448.000000	43448.000000	43448.000000
mean	2003.163621	0.127440	0.634063	1.177975	25.215932	51.553517	0.202853	0.128222
std	10.326515	1.125046	0.255987	0.251705	3.426420	14.709984	0.317471	1.127096
min	1982.000000	-3.000000	0.000000	0.700000	13.100000	0.500000	0.000000	-3.000000
25%	1994.000000	-0.500000	0.470000	1.000000	22.900000	41.400000	0.060000	0.000000
50%	2010.000000	0.000000	0.620000	1.100000	24.700000	51.600000	0.100000	0.000000
75%	2012.000000	1.000000	0.720000	1.200000	27.200000	62.400000	0.210000	1.000000
max	2016.000000	3.000000	2.870000	6.830000	48.800000	97.800000	6.540000	3.000000

	Year	Thermal sensation	Clo	Met	Air temperature (C)	Relative humidity (%)	Air velocity (m/s)	ThermalSensation_rounded
Country
Australia	1993.472190	0.133530	0.580548	1.218186	23.747651	50.347320	0.140030	0.133141
Brazil	2010.000000	0.141787	0.527037	1.088762	25.369211	64.132627	0.299327	0.141787
Canada	1994.491349	-0.280277	0.721569	1.211073	23.475663	33.447174	0.086367	-0.280277
France	1998.554839	0.436559	0.749505	1.375484	24.155484	38.222151	0.234301	0.436559
Germany	2005.000000	0.483092	0.560676	1.449034	24.757729	48.612560	1.113116	0.483092
Greece	1994.552611	0.524552	0.470966	1.243024	29.038270	35.262120	0.336633	0.524552
India	2011.875518	0.058930	0.704502	1.122783	26.440109	49.166757	0.210093	0.060891
Iran	1999.000000	-0.188329	1.484987	1.517507	20.019363	61.959947	0.031353	-0.188329
Italy	2008.000000	0.314410	0.624061	1.290830	25.455022	34.427948	0.589651	0.314410
Malaysia	2007.000000	0.249712	0.266329	1.033257	30.737745	70.929919	0.262025	0.249712
Philippines	2003.000000	-1.036101	0.648664	1.282671	23.755596	47.368592	0.144874	-1.036101
Portugal	1998.607189	0.146341	0.808472	1.474390	22.851540	51.055135	0.075597	0.146341
Singapore	1986.717980	0.376478	0.288805	1.178017	27.559606	68.439039	0.187525	0.381773
Sweden	1998.535106	0.017021	0.782606	1.377021	22.904255	36.141064	0.027415	0.017021
Thailand	1988.000000	0.372600	0.548857	1.100000	26.065707	63.890314	0.191902	0.370855
UK	1998.376946	0.223249	0.661785	1.385846	22.894407	45.579523	0.135934	0.223249
USA	1988.147119	0.173695	0.659556	1.120407	22.863356	46.020542	0.083495	0.175254

	Year	Thermal sensation	Clo	Met	Air temperature (C)	Relative humidity (%)	Air velocity (m/s)	ThermalSensation_rounded
Country
Australia	1992.0	0.0	0.540	1.17	23.5	50.80	0.11	0.0
Brazil	2010.0	0.0	0.500	1.00	25.3	64.00	0.20	0.0
Canada	1994.0	0.0	0.700	1.20	23.3	34.10	0.08	0.0
France	1999.0	0.0	0.730	1.30	24.1	38.10	0.05	0.0
Germany	2005.0	0.0	0.560	1.30	24.4	48.20	0.53	0.0
Greece	1993.0	1.0	0.390	1.20	29.5	34.90	0.20	1.0
India	2012.0	0.0	0.680	1.10	26.2	48.00	0.10	0.0
Iran	1999.0	0.0	1.500	1.60	20.3	63.00	0.03	0.0
Italy	2008.0	0.0	0.620	1.20	25.7	30.40	0.09	0.0
Malaysia	2007.0	0.0	0.270	1.00	30.6	71.30	0.20	0.0
Philippines	2003.0	-1.0	0.650	1.30	23.7	46.20	0.13	-1.0
Portugal	1999.0	0.0	0.750	1.30	23.0	51.10	0.06	0.0
Singapore	1987.0	0.0	0.315	1.13	28.9	69.70	0.15	0.0
Sweden	1999.0	0.0	0.750	1.20	22.9	33.95	0.03	0.0
Thailand	1988.0	0.0	0.560	1.10	24.6	63.20	0.13	0.0
UK	1998.0	0.0	0.650	1.20	22.8	45.50	0.07	0.0
USA	1987.0	0.0	0.620	1.00	22.8	45.70	0.07	0.0

Time	00:00:00	01:00:00	02:00:00	03:00:00	04:00:00	05:00:00	06:00:00	07:00:00	08:00:00	09:00:00	...	14:00:00	15:00:00	16:00:00	17:00:00	18:00:00	19:00:00	20:00:00	21:00:00	22:00:00	23:00:00
Date
2015-01-01	3.96	4.44	4.82	4.28	18.79	24.16	22.65	17.08	12.46	13.24	...	3.72	4.82	6.15	5.47	4.97	5.93	6.46	4.67	4.50	3.98
2015-01-02	4.55	4.79	4.03	4.71	19.15	23.07	23.19	21.89	16.67	9.60	...	3.76	3.26	3.83	4.47	3.81	3.58	3.79	4.09	4.22	4.22
2015-01-03	4.46	4.20	4.14	4.70	4.49	4.20	5.07	4.88	3.71	2.93	...	2.38	2.51	2.72	3.06	4.03	3.78	3.80	4.17	4.22	4.35
2015-01-04	4.50	4.47	3.95	4.61	4.48	4.34	5.15	6.26	3.69	2.97	...	2.40	2.39	2.60	2.90	3.47	3.50	4.24	4.01	3.99	3.73
2015-01-05	4.06	3.93	4.08	4.66	20.83	20.00	16.80	16.64	15.47	11.35	...	11.34	11.08	8.79	3.27	3.78	3.56	3.84	4.00	3.79	3.73

	Date	00:00:00	01:00:00	02:00:00	03:00:00	04:00:00	05:00:00	06:00:00	07:00:00	08:00:00	...	15:00:00	16:00:00	17:00:00	18:00:00	19:00:00	20:00:00	21:00:00	22:00:00	23:00:00	ClusterNo
0	2015-01-01	3.96	4.44	4.82	4.28	18.79	24.16	22.65	17.08	12.46	...	4.82	6.15	5.47	4.97	5.93	6.46	4.67	4.50	3.98	3
1	2015-01-02	4.55	4.79	4.03	4.71	19.15	23.07	23.19	21.89	16.67	...	3.26	3.83	4.47	3.81	3.58	3.79	4.09	4.22	4.22	3
2	2015-01-03	4.46	4.20	4.14	4.70	4.49	4.20	5.07	4.88	3.71	...	2.51	2.72	3.06	4.03	3.78	3.80	4.17	4.22	4.35	0
3	2015-01-04	4.50	4.47	3.95	4.61	4.48	4.34	5.15	6.26	3.69	...	2.39	2.60	2.90	3.47	3.50	4.24	4.01	3.99	3.73	0
4	2015-01-05	4.06	3.93	4.08	4.66	20.83	20.00	16.80	16.64	15.47	...	11.08	8.79	3.27	3.78	3.56	3.84	4.00	3.79	3.73	3

ClusterNo2	1										...	3
Date	2015-01-01	2015-01-02	2015-01-05	2015-01-06	2015-01-07	2015-01-08	2015-01-09	2015-01-12	2015-01-13	2015-01-14	...	2015-09-17	2015-09-21	2015-09-22	2015-09-23	2015-09-24	2015-09-25	2015-09-28	2015-09-29	2015-09-30	2015-10-01
00:00:00	3.96	4.55	4.06	3.25	4.61	4.42	4.62	4.05	4.59	4.12	...	4.09	4.59	5.11	4.71	4.93	5.63	4.57	5.60	5.90	4.89
01:00:00	4.44	4.79	3.93	3.65	4.31	5.01	4.95	4.71	4.60	4.48	...	4.97	4.88	10.58	4.74	5.50	5.72	5.20	5.38	5.26	5.56
02:00:00	4.82	4.03	4.08	4.04	3.98	4.18	4.98	4.48	4.71	4.69	...	6.18	5.75	7.18	4.31	4.44	5.07	4.18	4.79	5.20	5.57
03:00:00	4.28	4.71	4.66	4.20	4.99	5.67	5.10	7.35	6.75	9.79	...	5.14	4.99	4.78	4.26	4.90	5.64	4.68	5.36	5.21	4.51
04:00:00	18.79	19.15	20.83	15.90	13.89	6.63	13.67	9.57	7.78	12.37	...	9.50	19.93	10.67	8.44	10.04	11.30	17.25	10.35	10.88	9.69

	UnivClass_Ciara
timestamp
2015-01-01 00:00:00	136.824997
2015-01-01 01:00:00	144.025002
2015-01-01 02:00:00	144.875000
2015-01-01 03:00:00	142.375000
2015-01-01 04:00:00	148.199997

	0	1	2	3	4	...	2	0
0	1	0	0	0	0	...	1	-5.5
1	0	1	0	0	0	...	1	-6.1
2	0	0	1	0	0	...	1	-7.2
3	0	0	0	1	0	...	1	-6.6
4	0	0	0	0	1	...	1	-7.2

	Actual
timestamp
2015-04-01 00:00:00	145.500000
2015-04-01 01:00:00	141.775002
2015-04-01 02:00:00	158.499996
2015-04-01 03:00:00	142.849998
2015-04-01 04:00:00	145.550003

Energy Analysis¶

Monthly Energy Visulization¶

Different Design Options Scenerio¶

Comparing all of the options¶

Time Series Data Analysis - Meter Data Analysis¶

Convert timestamp index to a datetime object¶

Plot Simple Charts of time-series data¶

Resample the data¶

Analysis of a large number of buildings at once¶

Compare consumption of buildings to be each other¶

Normalize by floor area¶

Analysis on multiple buildings¶¶

Unnormalized energy consumption¶

Normalized Energy Consumption¶

Weather Influence on Energy Consumption¶

Filling gaps in data¶

Combine Temperature and Electricity data¶

Analyze the weather influence on energy consumption¶

Visualizations using Seaborn¶

Indoor Environment Quality Analysis¶

Summarizing Data¶

Understanding the diversity of data¶

Grouping data¶

Plotting data¶

Parallel Coordinate Plots¶

Conditions comfortable for people ?¶

Is it air temperature, humidity, air movement? Other stuff?¶

Does personal attributes have an impact?¶

How does building type or its systems influence people's comfort?¶

How does people from different country perceive comfort?¶

Machine Learning Model¶

Building Data Genome Project Data Set for Clustering and Regression Prediction¶

Daily Profile Analysis - Weekday vs. Weekend¶¶

Manual indentification of clusters¶

K-means clusdering - daily load profile¶

Clustering Model¶

Aggregate visualizations of the clusters¶

Electricity Prediction using Regression¶

Create Train and Test Datasets for Supervsed Learning¶

Encoding Categorical Variables¶

Evaluation Meterics¶

MAPE VS MSE¶

Supervised Classification Models¶¶

Load the IEQ (Indoor Environmental Quality) Data and find a classification objective¶

Predict Thermal Sensation using a Random Forest Model¶

Create the Train and Test Split using Scikit-Learn library¶

Create a Baseline Model to compare the accuracy of the model¶¶

Classification Report¶

Feature Importance¶

Plot Importances¶

Confusion Matrix Visualization¶