Collections Module

Python

by techbard 2025. 3. 31. 14:15

# The counter collection class is specifically designed for count purpose.
# A Counter can be instantiated from an iterable (such as a list or a string) or
# a mapping (like a dictiionary). Hashable objects are keys and their frequencies are values.

from collections import Counter

print(f"Is the counter class a subclass of dict? {issubclass(Counter, dict)}")

# Initialize with a list
letters = ["a", "b", "c", "a", "c", "a"]
letter_counter = Counter(letters)
print(f"Counter from a list:\t {letter_counter}")

# Initialize with a string
strings = "banana"
string_counter= Counter(strings)
print(f"Counter from a string:\t {string_counter}")

# Provide initial counts of an existing group of objects
# dict_letter_counter = Counter(a=4, b=2, c=-1)
dict_letter_counter = Counter({"a": 4, "b":2 , "c": -1})
print(f"Initialized Counter:\t {dict_letter_counter}")

# Output
Is the counter class a subclass of dict? True
Counter from a list:	 Counter({'a': 3, 'c': 2, 'b': 1})
Counter from a string:	 Counter({'a': 3, 'n': 2, 'b': 1})
Initialized Counter:	 Counter({'a': 4, 'b': 2, 'c': -1})

from collections import Counter

nums = ["0", "0", "1"]
num_counter = Counter(nums)

print(f"Original counter:\t\t\t{num_counter}")

# Update Counter with a string or a list
num_counter.update("1")
print(f"Update counter with strings:\t\t{num_counter}")

num_counter.update(["0", "1"])
print(f"Update counter with list:\t\t{num_counter}")

# You can also subtract counts
num_counter.subtract({"0": 1, "1": 1})
print(f"Subtracted counter:\t\t\t{num_counter}")

# Output
Original counter:			Counter({'0': 2, '1': 1})
Update counter with strings:		Counter({'0': 2, '1': 2})
Update counter with list:		Counter({'0': 3, '1': 3})
Subtracted counter:			Counter({'0': 2, '1': 2})

# Using Counter Methods
from collections import Counter

c = Counter(a=1, b=2, c=3)

# Reconstructing the original dataset, but without the original order
print(f"c.elements():\t\t{list(c.elements())}")

# Return a list of most common elements (from most to least common)
print(f"c.most_common():\t{c.most_common()}")

# Return a list of most to least common elements
most_common_elements = [k for k, _ in c.most_common()]
print(f"most to least elements:\t{most_common_elements}")

# Total of all counts from the Counter
print(f"c.total():\t\t{c.total()}")

# Output
c.elements():		['a', 'b', 'b', 'c', 'c', 'c']
c.most_common():	[('c', 3), ('b', 2), ('a', 1)]
most to least elements:	['c', 'b', 'a']
c.total():		6

# Word Frequency Count in Text
from collections import Counter

sentence = "How many times does each word show up in this sentence word word"
word = sentence.split()
c = Counter(word)
print(c.most_common(3))

# Output
[('word', 3), ('How', 1), ('many', 1)]

# Counting Items in Inventories
from collections import Counter

# Managing an inventory
inventory_a = Counter(apples=1, oranges=1)
inventory_b = Counter(apples=1, oranges=1, bananas=1)

# Combine inventories
total_inventory = inventory_a + inventory_b
print("Total inventory:\t\t", total_inventory)

# Items added
add_items = Counter(bananas=1)
remaining_inventory = total_inventory + add_items
print("Inventory after the filled:\t", remaining_inventory)

# Item sold
sold_items = Counter(apples=1, oranges=1, bananas=1)
remaining_inventory = remaining_inventory - sold_items
print("Inventory after the sale:\t", remaining_inventory)

# Output
Total inventory:		 Counter({'apples': 2, 'oranges': 2, 'bananas': 1})
Inventory after the filled:	 Counter({'apples': 2, 'oranges': 2, 'bananas': 2})
Inventory after the sale:	 Counter({'apples': 1, 'oranges': 1, 'bananas': 1})

# As you can see, Named Tuples offer a lot of clarity to the code without the expense
# of using a lot more memory than a regular tuple.

# Creating Named Tuples
from collections import namedtuple

# Creating a new tuple subclass (namedtuple class)
Pixel = namedtuple("Pixel", "red green blue")

# Using a namedtuple class to instantiate a new namedtuple object
pixel = Pixel(red=255, green=50, blue=0)
print(pixel)

# Get a tuple of field names
print(Pixel._fields)

# Accessing values by filed naems with the dot syntax.
print(f"pixel.red: {pixel.red}")
print(f"pixel.green: {pixel.green}")
print(f"pixel.blue: {pixel.blue}")

# Output
Pixel(red=255, green=50, blue=0)
('red', 'green', 'blue')
pixel.red: 255
pixel.green: 50
pixel.blue: 0

from collections import namedtuple

Pixel = namedtuple("Pixel", "red, green, blue")

image_pixel_data = [
    [255, 43, 22],
    [230, 44, 23],
    [230, 44, 23]
]

sprite = [Pixel._make(pixel) for pixel in image_pixel_data]
print(sprite)

# output
[Pixel(red=255, green=43, blue=22), Pixel(red=230, green=44, blue=23), Pixel(red=230, green=44, blue=23)]

# namedtuple() Use Cases
# ======================

# Named tuples improve code readability by utilizing named field.
# But creation time is a lot slower than with regular tuples.

# We could compare them to data classes and dictionaries.
# However, dictionaries and data classes (unless frozen) are mutable data structures.
# They also consume more moemory than named tuples, because they use dictionaries to store data
# (data clas hash __dict__). Named tuples also offer better erformance,
# especially compared to dictionaries.

# Named Tuples provide better readability.
# And they also make these related values immutable.

from collections import namedtuple

City = namedtuple("City", ["name", "latitude", "longitude"])

cities = [
    City("New York", 40.7128, -74.0060),
    City("Los Angeles", 34.0522, -118.2437),
    City("Chicago", 41.8781, -87.6298),
]

def find_city_by_name(city_name):
    for city in cities:
        if city.name == city_name:
            return city
    return None

def get_cities_count(cities: namedtuple):
    return len(cities)

found_city = find_city_by_name("New York")
if found_city:
    print(f"The coordinates of {found_city.name} are {found_city.latitude}, {found_city.longitude}")
else:
    print("City not found")

print(get_cities_count(cities))

# 클래스 속성 (필드로) 데이터를 관리할 수도 있다. (데이터 클래스)
# 하지만, 데이터 클래스는 데이터 저장으로 내부 dictionary를 사용한다.
# 이것의 메모리 소모가 크다.

# output
The coordinates of New York are 40.7128, -74.006
3

# Working with Functions
# -> Reducing the Number of Function Parameters

from collections import namedtuple

# Use a parameter for each piece of customer data
# def process_customer_info(id, first_name, last_name, email, address, city, state, zip_code):

# Some complex logic here
# print(f"Processing {first_name} {last_name} living in {city}, {state}.")

CustomerInfo = namedtuple("CustomerInfo", ["id", "first_name", "last_name", "email", "addredd", "city", "state", "zip_code"])
# Use the CustomerInfo namedtuple instead
def process_customer_info(customer_info):
    # Now, only a single parameter is needed
    print(f"Processing {customer_info.first_name} {customer_info.last_name} living in {customer_info.city}, {customer_info.state}")

customer = CustomerInfo(1, "X", "Y", "x.y@example.com", "123 Elm St", "Anytown", "Anystate", "12345")
process_customer_info(customer)

# output
Processing X Y living in Anytown, Anystate

# Return a Named Tuple from a Function
# ====================================
# If you remember tuples allowed us to act as if we can
# return multiple values from a function.
# Named Tuples can take one step further and give a name
# to each returened piece of data.

# As you can see, Named Tuples offer a lot of clarity to the code without the expense
# of using a lot more memory than a regular tuple.

from collections import namedtuple

# Define the FinancialStats namedtuple
FinancialStats = namedtuple("FinancialStats", ["average_expense", "total_expense", "highest_expense"])

def calculate_financial_stats(expenses: list):
    total_expense = sum(expenses)
    average_expense = total_expense / len(expenses)
    highest_expense = max(expenses)
    return FinancialStats(average_expense, total_expense, highest_expense)

# Usage
expenses = [250, 320, 150, 400, 500]
stats = calculate_financial_stats(expenses)
print(f"Average Expense: ${stats.average_expense}")
print(f"Highest Expense: ${stats.highest_expense}")
print(f"Total Expense: ${stats.total_expense}")

# output
Average Expense: $324.0
Highest Expense: $500
Total Expense: $1620

from collections import ChainMap
import os

# Default configuration
default_config = {"theme": "Default", "language": "English", "show_ads": True}

# Environment variables can override default settings
env_config = os.environ

# Chain them
app_config = ChainMap(env_config, default_config)

# Access the theme setting
print("Theme:", app_config["theme"])

# Access the environment setting
print(app_config["OS"])

# User input has the highest priority, followed by environment variables, then defaults
user_config = {"theme": "Dark Mode", "show_ads": False}
app_config = app_config.new_child(user_config)

# Accessing a setting
print("\nAfter adding the user config")
print("Theme:", app_config["theme"])
print("Language:", app_config["language"])
print("Show Ads:", app_config["show_ads"])

# output
Theme: Default
Windows_NT

After adding the user config
Theme: Dark Mode
Language: English
Show Ads: False

# deque
# =====
# Deque (short for "double-ended queue") is a generalization of stacks and queues.
# A deque supports list-like methods, but it can append and pop elements from both sides.

# If you're not familiar with that kind of a data structure,
# it is basically a list where each element contains a reference
# to the next and the previous element in the list.

# This allows us to append elements from both sides of the queue
# because deque is not allocated as a single chunk of memory.

# Each element can stand in memory on its own
# because it has a reference to the next and the previous element.

from collections import deque

# Create an empty deque
dq = deque()
print(dq)

# Initialize deque with an iterable
dq = deque((1, 2, 3)) # tuple
print(dq)

dq = deque([1, 2, 3]) # list
print(dq)

some_dict = {"1": "apple", "2": "banana", "3": "mango"}
dq = deque(some_dict.items()) # dictionary view object
print(dq)

# output
deque([])
deque([1, 2, 3])
deque([1, 2, 3])
deque([('1', 'apple'), ('2', 'banana'), ('3', 'mango')])

# Appending and Popping Elements from Both Sides

from collections import deque

# Append elements from both sides
dq = deque([1])
dq.append(2)
dq.appendleft(0)
print(dq)

# Pop elements from both sides
current_right_popped_el = dq.pop()
print("Popped element from the right:", current_right_popped_el)

current_left_popped_el = dq.popleft()
print("Popped element from the left:", current_left_popped_el)
print(dq)

# list와 유사하지만 list는 제거할 때 re-indexing을 하지만
# deque는 참조만 갱신해서 list 대비 더 나은 성능을 보여준다.
# (deque is much faster that a list.
# However, the cost of this is that deques are much lower in accessing random elements,
# which is an order of an operation in list.)

# output
deque([0, 1, 2])
Popped element from the right: 2
Popped element from the left: 0
deque([1])

# Creating a Bounded deque
# 개수 제한을 가지는 dequq
# ========================

from collections import deque

nums = deque(range(0, 4), maxlen=5)
print("deque's len:", nums.maxlen)
print(nums)

# Allowed, because the original deque hash just 4 elements
nums.appendleft(-1)
print("After nums.appendleft(-1):\t", nums)

# This will discard the first number -1
nums.append(4)
print("After nums.append(4):\t\t", nums)

# This will discard the first number 0
nums.append(5)
print("After nums.append(5):\t\t", nums)

# This will discard the last number 5
nums.appendleft(0)
print("After nums.appendleft(0):\t", nums)

# output
deque's len: 5
deque([0, 1, 2, 3], maxlen=5)
After nums.appendleft(-1):	 deque([-1, 0, 1, 2, 3], maxlen=5)
After nums.append(4):		 deque([0, 1, 2, 3, 4], maxlen=5)
After nums.append(5):		 deque([1, 2, 3, 4, 5], maxlen=5)
After nums.appendleft(0):	 deque([0, 1, 2, 3, 4], maxlen=5)

# Using Special deque Methods

from collections import deque

nums = deque([0, 0, 1])

# Rotate elements one step to the right
nums.rotate()
print("nums.rotate():\t", nums)

# Rotate elements 3 step to the right (= in-place)
nums.rotate(3)
print("nums.rotate(3):\t", nums)

# output
nums.rotate():	 deque([1, 0, 0])
nums.rotate(3):	 deque([1, 0, 0])

# The UserDict is the only one of these three user classes that
# actually offer some useful functionality to a regular dictionary.

# UserDict is a wrapper around the built-in dict class.
# The contents of the UserDict are stored in a real dict object which
# you can access through the data attribute.

# Modifying the Standard Dictionary Features

from collections import UserDict

class StringDict(UserDict):
    def __setitem__(self, key, value):
        if not isinstance(value, str):
            raise TypeError(f"Value must be of type string.")
        super().__setitem__(key, value)

# This raises TypeError.
# sd = StringDict({"a": 1})
# sd["b"] = 2

# This works well.
sd = StringDict({"a": "1"})
print(sd)

sd["b"] = "2"
print(sd)

sd.update({"c": "3"})
print(sd)

# output
{'a': '1'}
{'a': '1', 'b': '2'}
{'a': '1', 'b': '2', 'c': '3'}

from collections import defaultdict
import random

random_numbers = random.sample(range(10, 100), 10)
group_number = defaultdict(list)

for num in random_numbers:
    key = str(num)[0]
    group_number[key].append(num)

for key, value in group_number.items():
    total_value = sum(value)
    group_number[key].insert(0, f"sum: {total_value}")

print(group_number)

# output
defaultdict(<class 'list'>, {'7': ['sum: 157', 78, 79], '2': ['sum: 46', 25, 21], '9': ['sum: 99', 99], '5': ['sum: 52', 52], '4': ['sum: 44', 44], '8': ['sum: 88', 88], '6': ['sum: 61', 61], '3': ['sum: 32', 32]})

# list enumerate
my_list = ["apple", "banana", "cherry", "durian"]
for index, value in enumerate(my_list):
    print(index, value, end=" ")

print("\n")

# dict enumerate
my_dict = {"apple": 10, "banana": 20, "cherry": 30, "durian": 40}
for index, key in enumerate(my_dict):
    print(index, key, end=" ")

# output
0 apple 1 banana 2 cherry 3 durian 

0 apple 1 banana 2 cherry 3 durian