상세 컨텐츠

본문 제목

Parsing and Exploring Data

Python

by techbard 2025. 4. 10. 18:23

본문

반응형

 

### Load and parse a JSON data file and determine some information about it.

### 'date': '2022-05-24',
### 'tmin': 61,
### 'tmax': 80,
### 'prcp': 0.0,
### 'snow': 0.0,
### 'snwd': 0.0,
### 'awnd': 8.3},

### 'date': '2022-05-31',
### 'tmin': 68,
### 'tmax': 92,
### 'prcp': 0.0,
### 'snow': 0.0,
### 'snwd': 0.0,
### 'awnd': 4.0

import json
import os
import pprint

file_path = os.path.join('2025-03-10', 'rdu-weather-history.json')

### open the sample weather data file and use the json module to load and parse.
with open(file_path, "r") as weather_file:
    weather_data = json.load(weather_file)

# print(len(weather_data))

# first item in the data
# pprint.pp(weather_data[0])

years = {}

# How many days of data do we have for each year?
for d in weather_data:
    key = d['date'][0:4]
    if key in years:
        years[key] += 1
    else:
        years[key] = 1

# pprint.pp(years, width=5)
# pprint.pp(years)

### What was the warmest day in the data set?
warm_day = {'date': '0000-00-00', 'tmax': 0}

for data in weather_data:
    if data['tmax'] > warm_day['tmax']:
        warm_day['tmax'] = data['tmax']
        warm_day['date'] = data['date']

print(f"The warmest day was {warm_day['date']} at {warm_day['tmax']} degrees.")

### What was the coldest day in the data set?
cold_day = min(weather_data, key=lambda x: x['tmin'])
print(f"The coldest day was {cold_day['date']} at {cold_day['tmax']} degrees.")

# output
# The warmest day was 2017-07-23 at 102 degrees.
# The coldest day was 2018-01-07 at 30 degrees.

### How many days had snowfall?
snow_days = {'Count of snow day': 0}
for data in weather_data:
    if data['snow'] > 0.0:
        snow_days['Count of snow day'] += 1

print(snow_days)

# output
# {'Count of snow day': 15}

snow_days = [data['date'] for data in weather_data if data['snow'] > 0.0]
print(f"Snow fell on {len(snow_days)} days.")

# output
# Snow fell on 15 days.

 

import os
import json
import pprint

### It's a very common scenario to only wnat to work
### on a subset of a larger dataset.
### In other words, you want to filter out the values that
### you're not interested in to be able to focus on
### the values that you are interested in.

### {"date": "2017-01-03", "tmin": 47, "tmax": 56, "prcp": 0.31, "snow": 0.0, "snwd": 0.0, "awnd": 5.37},
### {"date": "2017-01-07", "tmin": 20, "tmax": 32, "prcp": 0.6, "snow": 0.5, "snwd": 0.0, "awnd": 9.62}

file_path = os.path.join("2025-03-10", "rdu-weather-history.json")
# print(os.path.isfile(file_path))
# print(os.getcwd())

with open(file_path) as file:
    weather_data = json.load(file)

### the filter() function gives us a way to remove unwanted data points
# snow_days = list(filter(lambda x: x['snow'] > 0.0, weather_data))
# print(len(weather_data))
# print(len(snow_days))

### filter can also be used on non-numerical data, like strings
### create a subset that contains summer days with heavy rain (more than 1 in, )
def is_summer_rain_day(d): # => in fact, filtering rainy summer day
    summer_months = ["-07-", "-08-"]
    if any(m in d['date'] for m in summer_months) and d['prcp'] >= 1.0:
        return True
    return False

summer_raindays = list(filter(is_summer_rain_day, weather_data))
print(len(summer_raindays))
pprint.pp(summer_raindays)

###output
# 15
# [{'date': '2017-08-08',
#   'tmin': 68,
#   'tmax': 78,
#   'prcp': 1.5,
#   'snow': 0.0,
#   'snwd': 0.0,
#   'awnd': 5.82},
#  {'date': '2018-08-03',
#   'tmin': 70,
#   'tmax': 77,
#   'prcp': 1.12,
#   'snow': 0.0,
#   'snwd': 0.0,
#   'awnd': 5.82},


##################################################
### So now we've seen two ways of filtering data.
### One using a list comprehension and
### one using the filter function.
### So I'd like to use list comprehensions
### when the filtering logic is simple
### and has maybe one or at the most two conditions.
### And I'll prefer using the filter function
### when the filtering logic gets more complex
### than can just fit into one line of code.
##################################################

 

import os
import json

file_path = os.path.join("C:/Users/.../MyPrj/2024-11-07", "history_weather.json")
# print(os.path.exists(file_path))

with open(file_path, 'r') as f:
    weather_data = json.load(f)

summer_months = ["-07-", "-08-"]
summer_rainydays = []

for data in weather_data:
### any()의 대상은 data가 아니라, summer_months이다. 이 비교 대상 집합 중 하나라도 걸리면...
    if any(m in data['date'] for m in summer_months) and data['prcp'] >= 1.0:
        summer_rainydays.append(data)
print(len(summer_rainydays))
# print(summer_rainydays)

# filter 함수의 인자로 2차원 이터러블을 넣어도, 하나씩 분리해서 함수에 던진다.
def is_summer_rainyday(d):
    summer_months = ["-07-", "-08-"]
    if any(m in d['date'] for m in summer_months) and d['prcp'] >= 1.0:
            return True
    return False

summer_rainydays = []
summer_rainydays = list(filter(is_summer_rainyday, weather_data))
print(len(summer_rainydays))

###output
# 15
# 15

 

### Get cold windy rainy days
def get_cold_windy_rainy_days():
    import os
    import json

    file_path = os.path.join("C:/Users/.../MyPrj/2024-11-07", "history_weather.json")
    with open(file_path, 'r') as f:
        weather_data = json.load(f)

    def is_cold_windy_rainy_day(d):
        avg_temp = d['tmax'] + d['tmin'] / 2
        total_prcp = d['prcp'] + d['snow']
        if avg_temp < 45 and total_prcp > 0.7 and d['awnd'] >= 10.0:
            return True
        return False
        
    blustery_days = list(filter(is_cold_windy_rainy_day, weather_data))
    return blustery_days

print(get_cold_windy_rainy_days())
###output
###[{'date': '2022-01-21', 'tmin': 22, 'tmax': 30, 'prcp': 0.15, 'snow': 1.5, 'snwd': 2.0, 'awnd': 10.7}]

 

### It's probably not a surprise to you
### that sorting is one of the most common data operations.

import os
import json
# import pprint

file_path = os.path.join("C:/Users/SKTelecom/UserApps/PyScripter/MyPrj/2024-11-07", "history_weather.json")
# print(os.path.exists(file_path))

with open(file_path, 'r') as f:
    weather_data = json.load(f)

### create a subset of the data a for days that had snowfall
snow_days = [data for data in weather_data if data['snow'] > 0]
print(len(snow_days))
sorted_snow_days = sorted(snow_days, key=lambda d: d['snow'], reverse=True)
last_idx = len(snow_days)
for s in sorted_snow_days:
    if sorted_snow_days.index(s) == last_idx-1:
        print(s['snow'])
    else:
        print(s['snow'], end=' / ')

### Sort on multiple fields: first by snowfall, then by average wind speed.
sorted_dataset = sorted(snow_days, key=lambda d:(d['snow'], d['awnd']))
print(len(sorted_dataset))
for i, d in enumerate(sorted_dataset):
    print(f"[{i+1:02d}] snow: {d['snow']} awnd: {d['awnd']}")

###output
# 15
# 7.0 / 5.9 / 2.5 / 1.9 / 1.6 / 1.5 / 1.4 / 0.9 / 0.79 / 0.5 / 0.39 / 0.3 / 0.3 / 0.2 / 0.2
# 15
# [01] snow: 0.2 awnd: 3.58
# [02] snow: 0.2 awnd: 5.59
# [03] snow: 0.3 awnd: 4.92
# [04] snow: 0.3 awnd: 5.82
# [05] snow: 0.39 awnd: 8.7
# [06] snow: 0.5 awnd: 9.62
# [07] snow: 0.79 awnd: 3.8
# [08] snow: 0.9 awnd: 4.25
# [09] snow: 1.4 awnd: 7.16
# [10] snow: 1.5 awnd: 10.7
# [11] snow: 1.6 awnd: 8.72
# [12] snow: 1.9 awnd: 7.61
# [13] snow: 2.5 awnd: 6.49
# [14] snow: 5.9 awnd: 5.82
# [15] snow: 7.0 awnd: 15.21

 

### Using built-in map function to transform from the original data to wanted format.

import os
import copy
import json
import pprint

file_path = os.path.join("C:/Users/.../2024-11-07", "history_weather.json")

with open(file_path, 'r') as f:
    weather_data = json.load(f)

### Convert the weather data from imperial to metric units.
def ToC(f):
    f = 0 if f is None else f
    return (f-32) * 5/9

def ToMM(i):
    i = 0 if i is None else i
    return i*25.4

def ToKPH(s):
    s = 0 if s is None else s
    return s * 1.60934

def ToMetric(wd):
    new_wd = copy.copy(wd)
    new_wd['tmin'] = ToC(wd['tmin'])
    new_wd['tmax'] = ToC(wd['tmax'])
    new_wd['prcp'] = ToMM(wd['prcp'])
    new_wd['snow'] = ToMM(wd['snow'])
    new_wd['snwd'] = ToMM(wd['snwd'])
    new_wd['awnd'] = ToKPH(wd['awnd'])
    return new_wd

metric_weather = list(map(ToMetric, weather_data))
pprint.pp(weather_data[0])
pprint.pp(metric_weather[0])

# convert objects to tuple
avg_temp = lambda t1, t2: (t1+t2)/2.0
tuple_data = list(map(lambda d:(d['date'], avg_temp(d['tmax'], d['tmin'])), weather_data))
print(tuple_data[0:2])

###output
# {'date': '2017-01-03',
 # 'tmin': 47,
 # 'tmax': 56,
 # 'prcp': 0.31,
 # 'snow': 0.0,
 # 'snwd': 0.0,
 # 'awnd': 5.37}
# {'date': '2017-01-03',
 # 'tmin': 8.333333333333334,
 # 'tmax': 13.333333333333334,
 # 'prcp': 7.874,
 # 'snow': 0.0,
 # 'snwd': 0.0,
 # 'awnd': 8.6421558}
# [('2017-01-03', 51.5), ('2017-01-07', 26.0)]

 

import json
import pprint

def get_day_temp_description():
    file_path = os.path.join("C:/Users/.../2024-11-07", "history_weather.json")
    with open(file_path, 'r') as f:
        weather_data = json.load(f)
    
    def average_temp_to_desc(d):
        avg_temp = (d['tmin'] + d['tmax'])/2
        desc = ""
        if avg_temp <= 60:
            desc = "cold"
        elif avg_temp > 60 and avg_temp < 80:
            desc = "warm"
        else:
            desc = "hot"
        return (d['date'], desc)
    
    new_data = list(map(average_temp_to_desc, weather_data))
    return new_data

desc = get_day_temp_description()
pprint.pp(desc[0:5])

###output
# [('2017-01-03', 'cold'),
 # ('2017-01-07', 'cold'),
 # ('2017-01-10', 'cold'),
 # ('2017-01-13', 'warm'),
 # ('2017-01-15', 'cold')]

 

### 'date': '2022-05-24',
### 'tmin': 61,
### 'tmax': 80,
### 'prcp': 0.0,
### 'snow': 0.0,
### 'snwd': 0.0,
### 'awnd': 8.3},

### 'date': '2022-05-31',
### 'tmin': 68,
### 'tmax': 92,
### 'prcp': 0.0,
### 'snow': 0.0,
### 'snwd': 0.0,
### 'awnd': 4.0

import os
import json
import pprint

def get_summer_rainy_days():
    file_path = os.path.join( \
    "C:/Users/.../2024-11-07", "history_weather.json")
    with open(file_path, 'r') as f:
        weather_data = json.load(f)

    def is_summer_rainy_day(d):
        summer_months = ["-07-", "-08-"]
        if any(m in d['date']for m in summer_months) and d['prcp'] >= 1.0:
            return True
        return False

    return list(filter(is_summer_rainy_day, weather_data))

result = get_summer_rainy_days()
pprint.pp(result[0:2])
        
###output
# [{'date': '2017-08-08',
  # 'tmin': 68,
  # 'tmax': 78,
  # 'prcp': 1.5,
  # 'snow': 0.0,
  # 'snwd': 0.0,
  # 'awnd': 5.82},
 # {'date': '2018-08-03',
  # 'tmin': 70,
  # 'tmax': 77,
  # 'prcp': 1.12,
  # 'snow': 0.0,
  # 'snwd': 0.0,
  # 'awnd': 5.82}]

 

# {'date': '2017-01-03',
 # 'tmin': 47,
 # 'tmax': 56,
 # 'prcp': 0.31,
 # 'snow': 0.0,
 # 'snwd': 0.0,
 # 'awnd': 5.37}

import os
import json
import pprint
from collections import defaultdict

### The default values of my dictionary will be integers
### if they don't already exist as a key.

file_path = os.path.join("C:/Users/SKTelecom/UserApps/PyScripter/MyPrj/2024-11-07", "history_weather.json")

with open(file_path, 'r') as f:
    weather_data = json.load(f)

### Count the number of data points for each year we have data
# years = defaultdict(int)
# for d in weather_data:
    # key = d['date'][0:4]
    # years[key] += 1

# pprint.pp(years)

### defaultdict can use more complex objects, like lists
years_months = defaultdict(list)
for d in weather_data:
    key = d['date'][0:7]
    (years_months[key]).append(d)

pprint.pp(len(years_months))

### What were the coldest and warmest day of each month?
def warmest_day(month):
    wd = max(month, key=lambda x: x['tmax'])
    return (wd['date'], wd['tmax'])

def coldest_day(month):
    wd = min(month, key=lambda x: x['tmin'])
    return (wd['date'], wd['tmin'])

### loop over the keys of the dictionary and find each warmest and coldest day
display_count = 3
idx = 1
for year_month, daylist in years_months.items():
    print(f"Warmest day in {year_month}: {warmest_day(daylist)}")
    print(f"Coldest day in {year_month}: {coldest_day(daylist)}")
    if display_count > idx:
        idx += 1
        continue
    else:
        break

###output
# 65
# Warmest day in 2017-01: ('2017-01-13', 76)
# Coldest day in 2017-01: ('2017-01-09', 9)
# Warmest day in 2017-02: ('2017-02-12', 83)
# Coldest day in 2017-02: ('2017-02-04', 26)
# Warmest day in 2017-03: ('2017-03-21', 82)
# Coldest day in 2017-03: ('2017-03-17', 23)

 

### Sometimes when you're working with a large dataset
### you want to be able to reduce that dataset down to a single value.

# {'date': '2017-01-03',
 # 'tmin': 47,
 # 'tmax': 56,
 # 'prcp': 0.31,
 # 'snow': 0.0,
 # 'snwd': 0.0,
 # 'awnd': 5.37}

import os
import json
import pprint
from functools import reduce
from collections import defaultdict

file_path = os.path.join("C:/Users/.../2024-11-07", "history_weather.json")

with open(file_path, 'r') as f:
    weather_data = json.load(f)

### how much snowfall is in the entire dataset?

### sum version
total_snowfall = sum([data['snow'] for data in weather_data])
# print(total_snowfall)

### reduce version
summing_snowfall = reduce(lambda acc, elem: acc + elem['snow'], weather_data, 0)
# print(total_snowfall)

### how much total precipitation is in the entire dataset?
total_prcp = reduce(lambda acc, elem: acc + (elem['snow'] + elem['prcp']), weather_data, 0)
# print(total_prcp)

### What was the warmest day in which it snowed? Need to find highest 'tmax' for all
### days where 'snow' > 0
snowed_days = defaultdict(list)

for d in weather_data:
    key = d['date'][0:4]
    if d['snow'] > 0.0:
        (snowed_days[key]).append(d)

snowed_tmax_days = {}
for k, v in snowed_days.items():
    snowed_tmax_days[k] = max(v, key=lambda x: x['tmax'])

sorting_result = sorted(snowed_tmax_days.items())
pprint.pp(sorting_result)

###output
# [('2017',
  # {'date': '2017-03-12',
   # 'tmin': 30,
   # 'tmax': 48,
   # 'prcp': 0.03,
   # 'snow': 0.3,
   # 'snwd': 0.0,
   # 'awnd': 4.92}),
 # ('2018',
  # {'date': '2018-03-24',
   # 'tmin': 27,
   # 'tmax': 50,
   # 'prcp': 0.91,
   # 'snow': 0.2,
   # 'snwd': 0.0,
   # 'awnd': 3.58}),
 # ('2020',
  # {'date': '2020-02-20',
   # 'tmin': 33,
   # 'tmax': 43,
   # 'prcp': 0.47,
   # 'snow': 2.5,
   # 'snwd': 0.0,
   # 'awnd': 6.49}),
 # ('2021',
  # {'date': '2021-01-28',
   # 'tmin': 29,
   # 'tmax': 40,
   # 'prcp': 0.36,
   # 'snow': 1.6,
   # 'snwd': 2.0,
   # 'awnd': 8.72}),
 # ('2022',
  # {'date': '2022-01-29',
   # 'tmin': 25,
   # 'tmax': 36,
   # 'prcp': 0.05,
   # 'snow': 0.39,
   # 'snwd': 0.0,
   # 'awnd': 8.7})]
   
#######################
### reduce func version
#######################
def warm_snow_day(acc, elem):
    return elem if elem['snow'] > 0 and elem['tmax'] > acc['tmax'] else acc

start_val = {
    "date": "1900-01-01",
    "tmin": 0,
    "tmax": 0,
    "prcp": 0.0,
    "snow": 0.0,
    "snwd": 0.0,
    "awnd": 0.0
}

result = reduce(warm_snow_day, weather_data, start_val)
print(f"{result['date']} with temp: {result['tmax']} and snowfall: {result['snow']}")
###output
# 2018-03-24 with temp: 50 and snowfall: 0.2

 

"""
 {'date': '2022-05-24',
  'tmin': 61,
  'tmax': 80,
  'prcp': 0.0,
  'snow': 0.0,
  'snwd': 0.0,
  'awnd': 8.3},
 {'date': '2022-05-31',
  'tmin': 68,
  'tmax': 92,
  'prcp': 0.0,
  'snow': 0.0,
  'snwd': 0.0,
  'awnd': 4.0}
"""

import os
import json
import pprint

file_path = os.path.join("rdu-weather-history.json")

with open(file_path, "r") as file:
    weather_data = json.load(file)

### 전체 년도에서 snow > 0.0 and tmax 인 date를 찾아라.
snow_days = list(filter(lambda d: d['snow'] > 0.0, weather_data))
snow_tmax_date = max(snow_days, key=lambda d: d['tmax'])
pprint.pp(snow_tmax_date)

### 년도별로 snow > 0.0 and tmax 인 date를 찾아라.
snow_days_dict = {}
for d in weather_data:
    key = d['date'][0:4] # 2017, 2018, 2019, 2020, 2021, 2022
    if d['snow'] > 0.0:
        (snow_days_dict.setdefault(key, [])).append(d)
# print(snow_days_dict['2017'])

snow_tmax_date_per_years = {}
for year, wdata in snow_days_dict.items():
    snow_tmax_date_per_years[year] = max(wdata, key=lambda d: d['tmax'])
sorted_snow_tmax_data_per_years = sorted(snow_tmax_date_per_years.items())
pprint.pp(sorted_snow_tmax_data_per_years)

###output
# {'date': '2018-03-24',
 # 'tmin': 27,
 # 'tmax': 50,
 # 'prcp': 0.91,
 # 'snow': 0.2,
 # 'snwd': 0.0,
 # 'awnd': 3.58}
# [('2017',
  # {'date': '2017-03-12',
   # 'tmin': 30,
   # 'tmax': 48,
   # 'prcp': 0.03,
   # 'snow': 0.3,
   # 'snwd': 0.0,
   # 'awnd': 4.92}),
 # ('2018',
  # {'date': '2018-03-24',
   # 'tmin': 27,
   # 'tmax': 50,
   # 'prcp': 0.91,
   # 'snow': 0.2,
   # 'snwd': 0.0,
   # 'awnd': 3.58}),
 # ('2020',
  # {'date': '2020-02-20',
   # 'tmin': 33,
   # 'tmax': 43,
   # 'prcp': 0.47,
   # 'snow': 2.5,
   # 'snwd': 0.0,
   # 'awnd': 6.49}),
 # ('2021',
  # {'date': '2021-01-28',
   # 'tmin': 29,
   # 'tmax': 40,
   # 'prcp': 0.36,
   # 'snow': 1.6,
   # 'snwd': 2.0,
   # 'awnd': 8.72}),
 # ('2022',
  # {'date': '2022-01-29',
   # 'tmin': 25,
   # 'tmax': 36,
   # 'prcp': 0.05,
   # 'snow': 0.39,
   # 'snwd': 0.0,
   # 'awnd': 8.7})]

 

import os
import json
import pprint

def get_snow_tmax_date():
    file_path = os.path.join("2025-03-10", "rdu-weather-history.json")
    with open(file_path) as file:
        weather_data = json.load(file)

    snow_tmax_dates = list(filter(lambda d: d['snow'] > 0.0, weather_data))
    return max(snow_tmax_dates, key=lambda d: d['tmax'])

pprint.pp(get_snow_tmax_date())

def get_snow_tmax_years():
    file_path = os.path.join("2025-03-10", "rdu-weather-history.json")
    with open(file_path) as file:
        weather_data = json.load(file)
    
    snow_tmax_dates = list(filter(lambda d: d['snow'] > 0.0, weather_data))
    years = {}
    for d in snow_tmax_dates:
        key = d['date'][0:4]
        (years.setdefault(key, [])).append(d)

    snow_tmax_dates_per_years = {}
    for y, d in years.items():
        snow_tmax_dates_per_years[y] = max(d, key=lambda x: x['tmax'])
    return sorted(snow_tmax_dates_per_years.items())

pprint.pp(get_snow_tmax_years())

###output
{'date': '2018-03-24',
 'tmin': 27,
 'tmax': 50,
 'prcp': 0.91,
 'snow': 0.2,
 'snwd': 0.0,
 'awnd': 3.58}
[('2017',
  {'date': '2017-03-12',
   'tmin': 30,
   'tmax': 48,
   'prcp': 0.03,
   'snow': 0.3,
   'snwd': 0.0,
   'awnd': 4.92}),
 ('2018',
  {'date': '2018-03-24',
   'tmin': 27,
   'tmax': 50,
   'prcp': 0.91,
   'snow': 0.2,
   'snwd': 0.0,
   'awnd': 3.58}),
 ('2020',
  {'date': '2020-02-20',
   'tmin': 33,
   'tmax': 43,
   'prcp': 0.47,
   'snow': 2.5,
   'snwd': 0.0,
   'awnd': 6.49}),
 ('2021',
  {'date': '2021-01-28',
   'tmin': 29,
   'tmax': 40,
   'prcp': 0.36,
   'snow': 1.6,
   'snwd': 2.0,
   'awnd': 8.72}),
 ('2022',
  {'date': '2022-01-29',
   'tmin': 25,
   'tmax': 36,
   'prcp': 0.05,
   'snow': 0.39,
   'snwd': 0.0,
   'awnd': 8.7})]

 

# {'date': '2017-01-03',
 # 'tmin': 47,
 # 'tmax': 56,
 # 'prcp': 0.31,
 # 'snow': 0.0,
 # 'snwd': 0.0,
 # 'awnd': 5.37}

import os
import json
import pprint
from itertools import groupby

file_path = os.path.join("C:/Users/.../2024-11-07", "history_weather.json")

with open(file_path, 'r') as f:
    weather_data = json.load(f)

### create manual grouping of days that had a certain level of precipitation
year = [day for day in weather_data if "2022" in day['date']]
year.sort(key=lambda d: d['prcp'])

### Use groupby to get the days of a given year by how much precipitation happened
### groupby를 iterator를 리턴하기 때문에, key는 그대로 써도, value는 list 형변환 해야한다. (dict comprehension)
grouped = {prcp_key: list(data) for prcp_key, data in groupby(year, key=lambda d: d['prcp'])}
print(f"{len(grouped)} total precipitation groups.")
### we can iterate over the dictionary to list each group
for key, data in grouped.items():
    print(f"Precip: {key}, # days: {len(data)}, Days: {list(map(lambda d: d['date'], data))}")
###output
# 36 total precipitation groups.
# Precip: 0.0, # days: 104, Days: ['2022-01-27', '2022-02-09', '2022-02-19', ...]
# Precip: 0.01, # days: 5, Days: ['2022-05-08', '2022-05-16', '2022-02-25', '2022-05-19', '2022-05-26']
# Precip: 0.02, # days: 3, Days: ['2022-05-07', '2022-02-26', '2022-02-03']
# Precip: 0.03, # days: 1, Days: ['2022-04-15']
# Precip: 0.04, # days: 1, Days: ['2022-02-24']
# Precip: 0.05, # days: 3, Days: ['2022-01-29', '2022-04-07', '2022-03-10']
# Precip: 0.06, # days: 3, Days: ['2022-01-22', '2022-03-11', '2022-04-26']

 

"""
 {'date': '2022-05-24',
  'tmin': 61,
  'tmax': 80,
  'prcp': 0.0,
  'snow': 0.0,
  'snwd': 0.0,
  'awnd': 8.3},
 {'date': '2022-05-31',
  'tmin': 68,
  'tmax': 92,
  'prcp': 0.0,
  'snow': 0.0,
  'snwd': 0.0,
  'awnd': 4.0}
"""

import os
import json
import pprint
from itertools import groupby

file_path = os.path.join("rdu-weather-history.json")

with open(file_path, "r") as file:
    weather_data = json.load(file)

### manual 생성
year_2018 = [wd for wd in weather_data if "2018" in wd['date']]
year_2018.sort(key=lambda d: d['prcp'])
# pprint.pp(year_2018)

###output
# [{'date': '2018-01-08',
  # 'tmin': 16,
  # 'tmax': 47,
  # 'prcp': 0.0,
  # 'snow': 0.0,
  # 'snwd': 0.0,
  # 'awnd': 5.37},
 # {'date': '2018-01-25',
  # 'tmin': 26,
  # 'tmax': 49,
  # 'prcp': 0.0,
  # 'snow': 0.0,
  # 'snwd': 0.0,
  # 'awnd': 3.13},

### groupby를 이용한 생성
grouped = {key: list(data) for key, data in groupby(weather_data, key=lambda d: d['prcp'])}
sorted_group = sorted(grouped.items())
pprint.pp(sorted_group)

###output
# [(0.0,
  # [{'date': '2022-05-09',
    # 'tmin': 44,
    # 'tmax': 71,
    # 'prcp': 0.0,
    # 'snow': 0.0,
    # 'snwd': 0.0,
    # 'awnd': 7.2},
   # {'date': '2022-05-10',
    # 'tmin': 48,
    # 'tmax': 75,
    # 'prcp': 0.0,
    # 'snow': 0.0,
    # 'snwd': 0.0,
    # 'awnd': 9.4},
   # {'date': '2022-05-13',
    # 'tmin': 63,
    # 'tmax': 78,
    # 'prcp': 0.0,
    # 'snow': 0.0,
    # 'snwd': 0.0,
    # 'awnd': 6.9},
   # {'date': '2022-05-15',
    # 'tmin': 61,
    # 'tmax': 86,
    # 'prcp': 0.0,
    # 'snow': 0.0,
    # 'snwd': 0.0,
    # 'awnd': 5.4},
   # {'date': '2022-05-17',
    # 'tmin': 53,
    # 'tmax': 84,
    # 'prcp': 0.0,
    # 'snow': 0.0,
    # 'snwd': 0.0,
    # 'awnd': 4.5},
   # {'date': '2022-05-18',
    # 'tmin': 55,
    # 'tmax': 87,
    # 'prcp': 0.0,
    # 'snow': 0.0,
    # 'snwd': 0.0,
    # 'awnd': 6.3},
   # {'date': '2022-05-20',
    # 'tmin': 66,
    # 'tmax': 96,
    # 'prcp': 0.0,
    # 'snow': 0.0,
    # 'snwd': 0.0,
    # 'awnd': 10.5},
   # {'date': '2022-05-24',
    # 'tmin': 61,
    # 'tmax': 80,
    # 'prcp': 0.0,
    # 'snow': 0.0,
    # 'snwd': 0.0,
    # 'awnd': 8.3},
   # {'date': '2022-05-31',
    # 'tmin': 68,
    # 'tmax': 92,
    # 'prcp': 0.0,
    # 'snow': 0.0,
    # 'snwd': 0.0,
    # 'awnd': 4.0}]),
 # (0.01,
  # [{'date': '2021-10-22',
    # 'tmin': 54,
    # 'tmax': 78,
    # 'prcp': 0.01,
    # 'snow': 0.0,
    # 'snwd': 0.0,
    # 'awnd': 5.82}]),

 

### Almost nobody likes having to work with dates and times.
### And there's a lot of reasons for this.
### Dealing with time zones and leap years and performing calculations on date and time values,
### these are not exactly high on the list of most developer's ideas of fun.

import os
import json
import pprint
from datetime import date

file_path = os.path.join("C:/Users/.../2024-11-07", "history_weather.json")

with open(file_path, 'r') as f:
    weather_data = json.load(f)

### The datetime module converts strings into dates fairly  easily
day = date.fromisoformat(weather_data[0]['date'])
# print(day)

def is_weekend_day(d):
    day = date.fromisoformat(d['date'])
    return day.weekday() == 5 or day.weekday() == 6

weekdays = list(filter(is_weekend_day, weather_data))
warmest_weekday = max(weekdays, key=lambda d: d['tmax'])
print(date.fromisoformat(warmest_weekday['date']).strftime('%Y %b %d, %a'))
###output
# 2017 Jul 23, Sun

 

# {'date': '2017-01-03',
 # 'tmin': 47,
 # 'tmax': 56,
 # 'prcp': 0.31,
 # 'snow': 0.0,
 # 'snwd': 0.0,
 # 'awnd': 5.37}

import os
import json
import pprint

file_path = os.path.join("C:/Users/.../2024-11-07", "history_weather.json")

with open(file_path, 'r') as f:
    weather_data = json.load(f)

### 조건에 맞는 것만 고르고 싶다 -> filter
### 다른 변환을 하고 싶다 -> map
### 모든 요소를 다 거치면서 평가하고 싶다 -> reduce

def get_miserable_day(wd):
    def miserable_score(day):
        wind = 0 if day['awnd'] is None else day['awnd']
        temp = day['tmax'] * 0.8
        rain = day['prcp'] * 10
        score = (temp+rain+wind)/3
        return score

    m_day = max(wd, key=miserable_score)
    m_day_score = miserable_score(m_day)
    return (m_day, m_day_score)
    
pprint.pp(get_miserable_day(weather_data))
###outout
# ({'date': '2018-04-15',
  # 'tmin': 61,
  # 'tmax': 82,
  # 'prcp': 3.31,
  # 'snow': 0.0,
  # 'snwd': 0.0,
  # 'awnd': 12.53},
 # 37.076666666666675)

 

# {'date': '2017-01-03',
 # 'tmin': 47,
 # 'tmax': 56,
 # 'prcp': 0.31,
 # 'snow': 0.0,
 # 'snwd': 0.0,
 # 'awnd': 5.37}

import os
import json
import pprint

file_path = os.path.join("C:/Users/.../2024-11-07", "history_weather.json")

with open(file_path, 'r') as f:
    weather_data = json.load(f)

def get_miserable_day(wd):
    def miserable_score(day):
        wind = 0 if day['awnd'] is None else day['awnd']
        temp = day['tmax'] * 0.8
        rain = day['prcp'] * 10
        score = (wind+temp+rain)/3
        return score

    miserable_day = max(wd, key=miserable_score)
    miserable_day_score = miserable_score(miserable_day)
    return (miserable_day, {'scor': miserable_day_score})

pprint.pp(get_miserable_day(weather_data))
###output
# ({'date': '2018-04-15',
#   'tmin': 61,
#   'tmax': 82,
#   'prcp': 3.31,
#   'snow': 0.0,
#   'snwd': 0.0,
#   'awnd': 12.53},
#  {'scor': 37.076666666666675})

 

import random, os, json, pprint

rn = random.sample(range(1, 101), 10)
# print(rn)

file_path = os.path.join("C:/Users/.../2024-11-07", "history_weather.json")

with open(file_path, 'r') as f:
    weather_data = json.load(f)

### the random module can be used to generate random values
# print(random.random())

### the random module in a range including both points
# print(random.randint(10, 20))

### the random module in a range excluding end point
# print(random.randrange(10, 20))

### build a list of the summer days in 2019
def is_summer_day(day):
    summer_months = ["2019-07-", "2019-08-"]
    if any(m in day['date'] for m in summer_months):
        return True
    return False

summer_2019 = list(filter(is_summer_day, weather_data))
# pprint.pp(summer_2019)

### choose 5 random days from that summer
five_summers = random.sample(summer_2019, 5)
# pprint.pp(len(five_summers))

### what was the windiest of those 5 days?
pprint.pp(max(five_summers, key=lambda d: d['awnd']))

###output
# {'date': '2019-08-07',
#  'tmin': 72,
#  'tmax': 91,
#  'prcp': 0.14,
#  'snow': 0.0,
#  'snwd': 0.0,
#  'awnd': 6.49}

 

 # ('2020',
  # {'date': '2020-02-20',
   # 'tmin': 33,
   # 'tmax': 43,
   # 'prcp': 0.47,
   # 'snow': 2.5,
   # 'snwd': 0.0,
   # 'awnd': 6.49}),

import random, os, json, pprint

file_path = os.path.join("C:/Users/SKTelecom/UserApps/PyScripter/MyPrj/2024-11-07", "history_weather.json")

with open(file_path, 'r') as f:
    weather_data = json.load(f)

def is_winter_days(day):
    winter_days = ["2019-11-", "2019-12-", "2019-01-", "2019-02-"]
    return True if any(m in day['date'] for m in winter_days) else False

winter_days = list(filter(is_winter_days, weather_data))
# pprint.pp(winter_days)

coldest_winter_day = min(winter_days, key=lambda d: d['tmin'])
pprint.pp(coldest_winter_day)
###output
# {'date': '2019-01-21',
 # 'tmin': 17,
 # 'tmax': 31,
 # 'prcp': 0.0,
 # 'snow': 0.0,
 # 'snwd': 0.0,
 # 'awnd': 5.82}

 

반응형

관련글 더보기

댓글 영역