### Load and parse a JSON data file and determine some information about it.
### 'date': '2022-05-24',
### 'tmin': 61,
### 'tmax': 80,
### 'prcp': 0.0,
### 'snow': 0.0,
### 'snwd': 0.0,
### 'awnd': 8.3},
### 'date': '2022-05-31',
### 'tmin': 68,
### 'tmax': 92,
### 'prcp': 0.0,
### 'snow': 0.0,
### 'snwd': 0.0,
### 'awnd': 4.0
import json
import os
import pprint
file_path = os.path.join('2025-03-10', 'rdu-weather-history.json')
### open the sample weather data file and use the json module to load and parse.
with open(file_path, "r") as weather_file:
weather_data = json.load(weather_file)
# print(len(weather_data))
# first item in the data
# pprint.pp(weather_data[0])
years = {}
# How many days of data do we have for each year?
for d in weather_data:
key = d['date'][0:4]
if key in years:
years[key] += 1
else:
years[key] = 1
# pprint.pp(years, width=5)
# pprint.pp(years)
### What was the warmest day in the data set?
warm_day = {'date': '0000-00-00', 'tmax': 0}
for data in weather_data:
if data['tmax'] > warm_day['tmax']:
warm_day['tmax'] = data['tmax']
warm_day['date'] = data['date']
print(f"The warmest day was {warm_day['date']} at {warm_day['tmax']} degrees.")
### What was the coldest day in the data set?
cold_day = min(weather_data, key=lambda x: x['tmin'])
print(f"The coldest day was {cold_day['date']} at {cold_day['tmax']} degrees.")
# output
# The warmest day was 2017-07-23 at 102 degrees.
# The coldest day was 2018-01-07 at 30 degrees.
### How many days had snowfall?
snow_days = {'Count of snow day': 0}
for data in weather_data:
if data['snow'] > 0.0:
snow_days['Count of snow day'] += 1
print(snow_days)
# output
# {'Count of snow day': 15}
snow_days = [data['date'] for data in weather_data if data['snow'] > 0.0]
print(f"Snow fell on {len(snow_days)} days.")
# output
# Snow fell on 15 days.
import os
import json
import pprint
### It's a very common scenario to only wnat to work
### on a subset of a larger dataset.
### In other words, you want to filter out the values that
### you're not interested in to be able to focus on
### the values that you are interested in.
### {"date": "2017-01-03", "tmin": 47, "tmax": 56, "prcp": 0.31, "snow": 0.0, "snwd": 0.0, "awnd": 5.37},
### {"date": "2017-01-07", "tmin": 20, "tmax": 32, "prcp": 0.6, "snow": 0.5, "snwd": 0.0, "awnd": 9.62}
file_path = os.path.join("2025-03-10", "rdu-weather-history.json")
# print(os.path.isfile(file_path))
# print(os.getcwd())
with open(file_path) as file:
weather_data = json.load(file)
### the filter() function gives us a way to remove unwanted data points
# snow_days = list(filter(lambda x: x['snow'] > 0.0, weather_data))
# print(len(weather_data))
# print(len(snow_days))
### filter can also be used on non-numerical data, like strings
### create a subset that contains summer days with heavy rain (more than 1 in, )
def is_summer_rain_day(d): # => in fact, filtering rainy summer day
summer_months = ["-07-", "-08-"]
if any(m in d['date'] for m in summer_months) and d['prcp'] >= 1.0:
return True
return False
summer_raindays = list(filter(is_summer_rain_day, weather_data))
print(len(summer_raindays))
pprint.pp(summer_raindays)
###output
# 15
# [{'date': '2017-08-08',
# 'tmin': 68,
# 'tmax': 78,
# 'prcp': 1.5,
# 'snow': 0.0,
# 'snwd': 0.0,
# 'awnd': 5.82},
# {'date': '2018-08-03',
# 'tmin': 70,
# 'tmax': 77,
# 'prcp': 1.12,
# 'snow': 0.0,
# 'snwd': 0.0,
# 'awnd': 5.82},
##################################################
### So now we've seen two ways of filtering data.
### One using a list comprehension and
### one using the filter function.
### So I'd like to use list comprehensions
### when the filtering logic is simple
### and has maybe one or at the most two conditions.
### And I'll prefer using the filter function
### when the filtering logic gets more complex
### than can just fit into one line of code.
##################################################
import os
import json
file_path = os.path.join("C:/Users/.../MyPrj/2024-11-07", "history_weather.json")
# print(os.path.exists(file_path))
with open(file_path, 'r') as f:
weather_data = json.load(f)
summer_months = ["-07-", "-08-"]
summer_rainydays = []
for data in weather_data:
### any()의 대상은 data가 아니라, summer_months이다. 이 비교 대상 집합 중 하나라도 걸리면...
if any(m in data['date'] for m in summer_months) and data['prcp'] >= 1.0:
summer_rainydays.append(data)
print(len(summer_rainydays))
# print(summer_rainydays)
# filter 함수의 인자로 2차원 이터러블을 넣어도, 하나씩 분리해서 함수에 던진다.
def is_summer_rainyday(d):
summer_months = ["-07-", "-08-"]
if any(m in d['date'] for m in summer_months) and d['prcp'] >= 1.0:
return True
return False
summer_rainydays = []
summer_rainydays = list(filter(is_summer_rainyday, weather_data))
print(len(summer_rainydays))
###output
# 15
# 15
### Get cold windy rainy days
def get_cold_windy_rainy_days():
import os
import json
file_path = os.path.join("C:/Users/.../MyPrj/2024-11-07", "history_weather.json")
with open(file_path, 'r') as f:
weather_data = json.load(f)
def is_cold_windy_rainy_day(d):
avg_temp = d['tmax'] + d['tmin'] / 2
total_prcp = d['prcp'] + d['snow']
if avg_temp < 45 and total_prcp > 0.7 and d['awnd'] >= 10.0:
return True
return False
blustery_days = list(filter(is_cold_windy_rainy_day, weather_data))
return blustery_days
print(get_cold_windy_rainy_days())
###output
###[{'date': '2022-01-21', 'tmin': 22, 'tmax': 30, 'prcp': 0.15, 'snow': 1.5, 'snwd': 2.0, 'awnd': 10.7}]
### It's probably not a surprise to you
### that sorting is one of the most common data operations.
import os
import json
# import pprint
file_path = os.path.join("C:/Users/SKTelecom/UserApps/PyScripter/MyPrj/2024-11-07", "history_weather.json")
# print(os.path.exists(file_path))
with open(file_path, 'r') as f:
weather_data = json.load(f)
### create a subset of the data a for days that had snowfall
snow_days = [data for data in weather_data if data['snow'] > 0]
print(len(snow_days))
sorted_snow_days = sorted(snow_days, key=lambda d: d['snow'], reverse=True)
last_idx = len(snow_days)
for s in sorted_snow_days:
if sorted_snow_days.index(s) == last_idx-1:
print(s['snow'])
else:
print(s['snow'], end=' / ')
### Sort on multiple fields: first by snowfall, then by average wind speed.
sorted_dataset = sorted(snow_days, key=lambda d:(d['snow'], d['awnd']))
print(len(sorted_dataset))
for i, d in enumerate(sorted_dataset):
print(f"[{i+1:02d}] snow: {d['snow']} awnd: {d['awnd']}")
###output
# 15
# 7.0 / 5.9 / 2.5 / 1.9 / 1.6 / 1.5 / 1.4 / 0.9 / 0.79 / 0.5 / 0.39 / 0.3 / 0.3 / 0.2 / 0.2
# 15
# [01] snow: 0.2 awnd: 3.58
# [02] snow: 0.2 awnd: 5.59
# [03] snow: 0.3 awnd: 4.92
# [04] snow: 0.3 awnd: 5.82
# [05] snow: 0.39 awnd: 8.7
# [06] snow: 0.5 awnd: 9.62
# [07] snow: 0.79 awnd: 3.8
# [08] snow: 0.9 awnd: 4.25
# [09] snow: 1.4 awnd: 7.16
# [10] snow: 1.5 awnd: 10.7
# [11] snow: 1.6 awnd: 8.72
# [12] snow: 1.9 awnd: 7.61
# [13] snow: 2.5 awnd: 6.49
# [14] snow: 5.9 awnd: 5.82
# [15] snow: 7.0 awnd: 15.21
### Using built-in map function to transform from the original data to wanted format.
import os
import copy
import json
import pprint
file_path = os.path.join("C:/Users/.../2024-11-07", "history_weather.json")
with open(file_path, 'r') as f:
weather_data = json.load(f)
### Convert the weather data from imperial to metric units.
def ToC(f):
f = 0 if f is None else f
return (f-32) * 5/9
def ToMM(i):
i = 0 if i is None else i
return i*25.4
def ToKPH(s):
s = 0 if s is None else s
return s * 1.60934
def ToMetric(wd):
new_wd = copy.copy(wd)
new_wd['tmin'] = ToC(wd['tmin'])
new_wd['tmax'] = ToC(wd['tmax'])
new_wd['prcp'] = ToMM(wd['prcp'])
new_wd['snow'] = ToMM(wd['snow'])
new_wd['snwd'] = ToMM(wd['snwd'])
new_wd['awnd'] = ToKPH(wd['awnd'])
return new_wd
metric_weather = list(map(ToMetric, weather_data))
pprint.pp(weather_data[0])
pprint.pp(metric_weather[0])
# convert objects to tuple
avg_temp = lambda t1, t2: (t1+t2)/2.0
tuple_data = list(map(lambda d:(d['date'], avg_temp(d['tmax'], d['tmin'])), weather_data))
print(tuple_data[0:2])
###output
# {'date': '2017-01-03',
# 'tmin': 47,
# 'tmax': 56,
# 'prcp': 0.31,
# 'snow': 0.0,
# 'snwd': 0.0,
# 'awnd': 5.37}
# {'date': '2017-01-03',
# 'tmin': 8.333333333333334,
# 'tmax': 13.333333333333334,
# 'prcp': 7.874,
# 'snow': 0.0,
# 'snwd': 0.0,
# 'awnd': 8.6421558}
# [('2017-01-03', 51.5), ('2017-01-07', 26.0)]
import json
import pprint
def get_day_temp_description():
file_path = os.path.join("C:/Users/.../2024-11-07", "history_weather.json")
with open(file_path, 'r') as f:
weather_data = json.load(f)
def average_temp_to_desc(d):
avg_temp = (d['tmin'] + d['tmax'])/2
desc = ""
if avg_temp <= 60:
desc = "cold"
elif avg_temp > 60 and avg_temp < 80:
desc = "warm"
else:
desc = "hot"
return (d['date'], desc)
new_data = list(map(average_temp_to_desc, weather_data))
return new_data
desc = get_day_temp_description()
pprint.pp(desc[0:5])
###output
# [('2017-01-03', 'cold'),
# ('2017-01-07', 'cold'),
# ('2017-01-10', 'cold'),
# ('2017-01-13', 'warm'),
# ('2017-01-15', 'cold')]
댓글 영역