Module - path, urllib, json, html, xml, random

Python

by techbard 2024. 11. 24. 19:35

path module

# path module

import os
import shutil
from os import path

print(os.name)
print(path.exists('c:\windows'))
print(path.isdir('c:\windows'))
print(path.isfile('c:\windows\explorer.exe'))

if path.exists('c:\windows\explorer.exe'):
    src = path.realpath('c:\windows\explorer.exe')

    print(src)

    head, tail = path.split(src)
    print('path:', head)
    print('file:', tail)

결과)

nt
True
True
True
c:\windows\explorer.exe
path: c:\windows
file: explorer.exe

urllib module

# HTTP 에러코드 얻기

import urllib.request

req = urllib.request.Request('http://www.pretend_server.org')
try:
urllib.request.urlopen(req)
except urllib.error.URLError as e:
print(e.reason)

결과)

[Errno 11004] getaddrinfo failed

json module

# json 데이터 읽기

import urllib.requestimport urllibimport json

def printResule(data): theJSON = json.loads(data.decode())
if 'title' in theJSON['metadata']: print(theJSON['metadata']['title'])
if 'count' in theJSON['metadata']: count = theJSON['metadata']['count'] print(str(count), 'events recorded.')
if 'features' in theJSON: for i in theJSON['features']: if i['properties']['mag'] >= 4.0: print('%2.1f' % i['properties']['mag'], i['properties']['place'])
print()
for i in theJSON['features']: feltReports = i['properties']['felt'] if feltReports != None and feltReports > 0: print('%2.1f' % i['properties']['mag'], i['properties']['place'], 'reported', str(feltReports), 'times')
urlData = 'http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson'webUrl = urllib.request.urlopen(urlData)print('Server return code:', webUrl.getcode())
if webUrl.getcode() == 200: data = webUrl.read() printResule(data)else:

print('Received an error from server: ', str(webUrl.getcode()))
결과)Server return code: 200USGS Magnitude 2.5+ Earthquakes, Past Day36 events recorded.4.9 43km E of Nishinoomote, Japan4.7 160km SSW of Suva, Fiji4.3 46km ESE of Curico, Chile4.0 93km SE of Yunaska Island, Alaska4.2 171km NNW of Saumlaki, Indonesia4.7 54km W of Bamboo Flat, India5.0 104km SSW of Banda Aceh, Indonesia5.3 98km SSW of Taron, Papua New Guinea4.8 Northern Mid-Atlantic Ridge4.9 Northern Mid-Atlantic Ridge4.1 41km N of Murghob, Tajikistan5.7 87km SW of Hihifo, Tonga5.3 82km WSW of Khuzdar, Pakistan
3.1 14km NE of Cherokee, Oklahoma reported 1 times2.5 9km SSE of Gilroy, California reported 1 times2.5 25km SE of Shingletown, California reported 4 times3.2 47km NW of Lordsburg, New Mexico reported 8 times3.1 38km N of Yucca Valley, California reported 2 times4.7 160km SSW of Suva, Fiji reported 1 times2.9 109km N of Dorado, Puerto Rico reported 1 times2.8 8km NW of Healdton, Oklahoma reported 1 times2.7 28km ENE of Pablo, Montana reported 1 times

2.9 15km SW of Cherokee, Oklahoma reported 1 times
* json 데이터 포맷http://earthquake.usgs.gov/earthquakes/feed/v1.0/geojson.php

html module

# html module

: samplehtml.html

<!DOCTYPE html>

<html lang="en">

<head>

<meta charset="utf-8" />

<title>Sample HTML Document</title>

<meta name="description" content="This is a sample HTML file" />

<meta name="author" content="Administrator" />

<meta name="viewport" content="width=device-width; initial-scale=1.0" />



<link rel="shortcut icon" href="/favicon.ico" />

<link rel="apple-touch-icon" href="/apple-touch-icon.png" />

</head>

<body>

<div>

<header>

<h1>HTML Sample File</h1>

</header>

<nav>

<p>

<a href="/">Home</a>

</p>

<p>

<a href="/contact">Contact</a>

</p>

</nav>

<div>

</div>

<footer>

<p>© Copyright by Administrator</p>

</footer>

</div>

</body>

</html>

# html 파일 파싱

from html.parser import HTMLParser

metacount = 0

class MyHTMLParser(HTMLParser):

''' overriding '''

def handle_starttag(self, tag, attrs):

global metacount

print('Encounted a start tag', tag)

if tag == 'meta':

metacount += 1

pos = self.getpos()

print('At line:', pos[0], 'position', pos[1])

if len(attrs) > 0:

print('\tAttributes:')

for a in attrs:

print('\t', a[0], '=', a[1])

def handle_endtag(self, data):

print('Encounted an end tag:', data)

pos = self.getpos()

print('At line:', pos[0], 'position', pos[1])

def handle_data(self, data):

print('Encounted some data:', data)

pos = self.getpos()

print('At line:', pos[0], 'position', pos[1])

def handle_comment(self, data):

print('Encounted comment:', data)

pos = self.getpos()

print('At line:', pos[0], 'position', pos[1])

parser = MyHTMLParser()

f = open('samplehtml.html', 'r')

contents = f.read()

parser.feed(contents)

print('%d meta tags encounted' % metacount)

결과)

Encounted some data:

At line: 1 position 15

Encounted a start tag html

At line: 2 position 0

Attributes:

lang = en

Encounted some data:



At line: 2 position 16

Encounted a start tag head

At line: 3 position 2

Encounted some data:



At line: 3 position 8

Encounted a start tag meta

At line: 4 position 4

Attributes:

charset = utf-8

Encounted an end tag: meta

At line: 4 position 4

Encounted some data:



At line: 4 position 28

Encounted a start tag title

At line: 5 position 4

Encounted some data: Sample HTML Document

At line: 5 position 11

Encounted an end tag: title

At line: 5 position 31

Encounted some data:



At line: 5 position 39

Encounted a start tag meta

At line: 6 position 4

Attributes:

name = description

content = This is a sample HTML file

Encounted an end tag: meta

At line: 6 position 4

Encounted some data:



At line: 6 position 68

Encounted a start tag meta

At line: 7 position 4

Attributes:

name = author

content = Administrator

Encounted an end tag: meta

At line: 7 position 4

Encounted some data:



At line: 7 position 50

Encounted a start tag meta

At line: 8 position 4

Attributes:

name = viewport

content = width=device-width; initial-scale=1.0

Encounted an end tag: meta

At line: 8 position 4

Encounted some data:



At line: 8 position 76

Encounted comment: Replace favicon.ico & apple-touch-icon.png in the root of your domain and delete these references

At line: 9 position 4

Encounted some data:



At line: 9 position 110

Encounted a start tag link

At line: 10 position 4

Attributes:

rel = shortcut icon

href = /favicon.ico

Encounted an end tag: link

At line: 10 position 4

Encounted some data:



At line: 10 position 52

Encounted a start tag link

At line: 11 position 4

Attributes:

rel = apple-touch-icon

href = /apple-touch-icon.png

Encounted an end tag: link

At line: 11 position 4

Encounted some data:



At line: 11 position 64

Encounted an end tag: head

At line: 12 position 2

Encounted some data:



At line: 12 position 9

Encounted a start tag body

At line: 14 position 2

Encounted some data:



At line: 14 position 8

Encounted a start tag div

At line: 15 position 4

Encounted some data:



At line: 15 position 9

Encounted a start tag header

At line: 16 position 6

Encounted some data:



At line: 16 position 14

Encounted a start tag h1

At line: 17 position 8

Encounted some data: HTML Sample File

At line: 17 position 12

Encounted an end tag: h1

At line: 17 position 28

Encounted some data:



At line: 17 position 33

Encounted an end tag: header

At line: 18 position 6

Encounted some data:



At line: 18 position 15

Encounted a start tag nav

At line: 19 position 6

Encounted some data:



At line: 19 position 11

Encounted a start tag p

At line: 20 position 8

Encounted some data:



At line: 20 position 11

Encounted a start tag a

At line: 21 position 10

Attributes:

href = /

Encounted some data: Home

At line: 21 position 22

Encounted an end tag: a

At line: 21 position 26

Encounted some data:



At line: 21 position 30

Encounted an end tag: p

At line: 22 position 8

Encounted some data:



At line: 22 position 12

Encounted a start tag p

At line: 23 position 8

Encounted some data:



At line: 23 position 11

Encounted a start tag a

At line: 24 position 10

Attributes:

href = /contact

Encounted some data: Contact

At line: 24 position 29

Encounted an end tag: a

At line: 24 position 36

Encounted some data:



At line: 24 position 40

Encounted an end tag: p

At line: 25 position 8

Encounted some data:



At line: 25 position 12

Encounted an end tag: nav

At line: 26 position 6

Encounted some data:



At line: 26 position 12

Encounted a start tag div

At line: 27 position 6

Encounted some data:



At line: 27 position 11

Encounted an end tag: div

At line: 29 position 6

Encounted some data:



At line: 29 position 12

Encounted a start tag footer

At line: 30 position 6

Encounted some data:



At line: 30 position 14

Encounted a start tag p

At line: 31 position 8

Encounted some data: Copyright by Administrator

At line: 31 position 17

Encounted an end tag: p

At line: 31 position 44

Encounted some data:



At line: 31 position 48

Encounted an end tag: footer

At line: 32 position 6

Encounted some data:



At line: 32 position 15

Encounted an end tag: div

At line: 33 position 4

Encounted some data:



At line: 33 position 10

Encounted an end tag: body

At line: 34 position 2

Encounted some data:

At line: 34 position 9

Encounted an end tag: html

At line: 35 position 0

Encounted some data:

At line: 35 position 7

4 meta tags encounted

xml module

# xml 데이터 읽기

: samplexml.xml

<?xml version="1.0" encoding="UTF-8" ?>

<person>

<firstname>Joe</firstname>

<lastname>Marini</lastname>

<home>San Francisco</home>

<skill name="JavaScript"/>

<skill name="Python"/>

<skill name="C#"/>

<skill name="HTML"/>

</person>

# xml 파일파싱

import xml.dom.minidom

doc = xml.dom.minidom.parse('samplexml.xml')

print(doc.nodeName)

print(doc.firstChild.tagName)

skills = doc.getElementsByTagName('skill')

print('%d skills:' % skills.length)

for skill in skills:

print(skill.getAttribute('name'))

newSkill = doc.createElement('skill')

newSkill.setAttribute('name', 'jQuery')

doc.firstChild.appendChild(newSkill)

print()

skills = doc.getElementsByTagName('skill')

print('%d skills:' % skills.length)

for skill in skills:

print(skill.getAttribute('name'))

결과)

#document

person

4 skills:

JavaScript

Python

C#

HTML

5 skills:

JavaScript

Python

C#

HTML

jQuery

# REST API
# Representational State Transfer
# REST API just means a website that produces data
# intended for another computer program to consume,
# rather than something intended to be displayed
# to people in a browser.
# A REST API will respond to requests in a particular format and
# some things are common across many APIs about the format,
# but some things are specific to the particular API.
#
# {protocol}://{server}/{arguments}
#
# HTTP behind the scenes
#
# Path name
#
# Any web server gets to decide what format or arguments
# its willing to accept.
#
# base URL
# https://itunes.apple.com/search
# ?term=Ann+Arbor&entity=podcast
#
# ?
#  key=value pairs.
# & <- separates the two pairs.
#
# ==> Its the basic structure of a request to a REST API.
#
import requests # pip install requests
import json

page = requests.get("https://api.datamuse.com/words?rel_rhy=funny")
print(type(page))
print(page.text[:150]) # print the first 150 characters
print(page.url) # print the url that was fetched
print("------")
x = page.json() # turn page.text into a python object
print(type(x))
print("---first item in the list---")
print(x[0])
print("---the whole list, pretty printed---")
print(json.dumps(x, indent=2)) # pretty print the results

# 결과
<class 'requests.models.Response'>
[{"word":"money","score":4415,"numSyllables":2},{"word":"honey","score":1206,"numSyllables":2},{"word":"sunny","score":717,"numSyllables":2},{"word":"
https://api.datamuse.com/words?rel_rhy=funny
------
<class 'list'>
---first item in the list---
{'word': 'money', 'score': 4415, 'numSyllables': 2}
---the whole list, pretty printed---
[
  {
    "word": "money",
    "score": 4415,
    "numSyllables": 2
  },
  {
    "word": "honey",
    "score": 1206,
    "numSyllables": 2
  },
  ...

import requests

kval_pairs = {'rel_rhy': 'funny'}
page = requests.get("https://api.datamuse.com/words", params=kval_pairs)
print(page.text[:150])
print(page.url)

print("")

d = {'q': '"violins and guitars"', 'tbm': 'isch'}
results = requests.get("https://google.com/search", params=d)
print(results.url)

# 결과
[{"word":"money","score":4415,"numSyllables":2},{"word":"honey","score":1206,"numSyllables":2},{"word":"sunny","score":717,"numSyllables":2},{"word":"
https://api.datamuse.com/words?rel_rhy=funny

https://www.google.com/search?q=%22violins+and+guitars%22&tbm=isch

# The answer is you are use the documentation for the API.
# https://www.datamuse.com/api/

import requests

def get_rhymes(word: str) -> list:
    baseUrl = "https://api.datamuse.com/words"
    params_dict = {}
    params_dict["rel_rhy"] = word
    params_dict["max"] = 3
    resp = requests.get(baseUrl, params=params_dict)
    word_ds = resp.json() # turn into a Python object by calling the JSON method on it.
    
    return [d['word'] for d in word _ds]
    # return resp.json()

print(get_rhymes("funny"))
print(get_rhymes("dash"))

# 결과
['money', 'honey', 'sunny']
['cache', 'flash', 'ash']

# What is a module?
# - All Python code is stored in files on your machine
# - These files containing Python code are called modules
# - You import module to get access to all functions, types, and other objects
#   within the module

# Import the choice function from the random module

from random import choice
states = ['TX', 'AZ', 'CA', 'FL', 'OK', 'VT']
selection = choice(states)
print(selection)

from random import choices, sample, shuffle
selections = choices(states, k=3)
print(selections)

ret = sample(states, 3)
print(ret)

print(f"Before shuffle: {states}")
shuffle(states)
print(f"After shuffle: {states}")

# 결과
['CA', 'OK', 'CA']
['CA', 'AZ', 'VT']
Before shuffle: ['TX', 'AZ', 'CA', 'FL', 'OK', 'VT']
After shuffle: ['TX', 'AZ', 'FL', 'OK', 'VT', 'CA']

# Alias names when importing with as
# - Alias: use a different variable name
# - Can alias an entire module or a single name

import statistics as st

a_list = [1, 5, 10, 3, 1]

mean_value = st.mean(a_list)
print(mean_value)

# 결과
4

s = """Please inform John (john@domail1.com) and Penelope
    (penelope@domain2.io) as well as Eleni (eleni@domain3.edu) 
    that baseball practice is cancelled for tomorrow."""

print(s)

import re

pattern = r"(\w+)@(\w+)\.(\w+)"
rst = re.findall(pattern, s)

print(rst)
# Output:
[('john', 'domail1', 'com'), ('penelope', 'domain2', 'io'), ('eleni', 'domain3', 'edu')]

import datetime

d = datetime.date(2024, 11, 11)
print(d.year)
print(d.weekday())
print(d.isoformat())

t = datetime.time(10, 57, 33)
print(t.second)
print(t.isoformat())

dt = datetime.datetime(2024, 11, 11, 10, 57, 33, 857_000)
print(dt.isoformat())

td = datetime.timedelta(days=7, hours=17, seconds=55)
print(td)
print(td.total_seconds())
print(dt + td)

# 결과
2024
0
2024-11-11
33
10:57:33
2024-11-11T10:57:33.857000
7 days, 17:00:55
666055.0
2024-11-19 03:58:28.857000

import calendar

print(type(calendar))
# calendar.prmonth(2024, 11)

# Output:
    November 2024
 Mo Tu We Th Fr Sa Su
              1  2  3
  4  5  6  7  8  9 10
 11 12 13 14 15 16 17
 18 19 20 21 22 23 24
 25 26 27 28 29 30

cal = calendar.Calendar()
dd = cal.monthdatescalendar(2024, 11)
print(dd)

[[datetime.date(2024, 10, 28), datetime.date(2024, 10, 29), datetime.date(2024, 10, 30), datetime.date(2024, 10, 31), datetime.date(2024, 11, 1), datetime.date(2024, 11, 2), datetime.date(2024, 11, 3)], [datetime.date(2024, 11, 4), datetime.date(2024, 11, 5), datetime.date(2024, 11, 6), datetime.date(2024, 11, 7), datetime.date(2024, 11, 8), datetime.date(2024, 11, 9), datetime.date(2024, 11, 10)], [datetime.date(2024, 11, 11), datetime.date(2024, 11, 12), datetime.date(2024, 11, 13), datetime.date(2024, 11, 14), datetime.date(2024, 11, 15), datetime.date(2024, 11, 16), datetime.date(2024, 11, 17)], [datetime.date(2024, 11, 18), datetime.date(2024, 11, 19), datetime.date(2024, 11, 20), datetime.date(2024, 11, 21), datetime.date(2024, 11, 22), datetime.date(2024, 11, 23), datetime.date(2024, 11, 24)], [datetime.date(2024, 11, 25), datetime.date(2024, 11, 26), datetime.date(2024, 11, 27), datetime.date(2024, 11, 28), datetime.date(2024, 11, 29), datetime.date(2024, 11, 30), datetime.date(2024, 12, 1)]]

import time

number_of_seconds_since_Unix_epoch = time.time()
print(number_of_seconds_since_Unix_epoch)

delay = 5
start_time = time.time()
while delay > 0:
    print(f"{delay} seconds to launch.")
    delay -= 1
    time.sleep(1)
print("Launch!")
end_time = time.time()
elapsed_time = end_time - start_time
print(f"The countdown took {elapsed_time:.7f} seconds")

# 결과
1732438659.781187
5 seconds to launch.
4 seconds to launch.
3 seconds to launch.
2 seconds to launch.
1 seconds to launch.
Launch!
The countdown took 5.0037036 seconds

# Count occurrences of letters in a string

string = 'peter piper picked a peck of pickled peppers'

d = {}
for char in string:
    if char not in d:
        d[char] = 1
    else:
        d[char] += 1
print(d)

from collections import Counter

ct = Counter(string)
print(ct)
mc = ct.most_common(3)
print(mc)

# 결과
{'p': 9, 'e': 8, 't': 1, 'r': 3, ' ': 7, 'i': 3, 'c': 3, 'k': 3, 'd': 2, 'a': 1, 'o': 1, 'f': 1, 'l': 1, 's': 1}
Counter({'p': 9, 'e': 8, ' ': 7, 'r': 3, 'i': 3, 'c': 3, 'k': 3, 'd': 2, 't': 1, 'a': 1, 'o': 1, 'f': 1, 'l': 1, 's': 1})
[('p', 9), ('e', 8), (' ', 7)]

# The copy module

a = [1, 2, 3]
b = a
print(f"a is b: {a is b}")

b = a.copy()
print(f"a is b: {a is b}")

# Shallow copies
# - A list within a list is not copied

a = [1, 2, [3, 4], 5]
b = a.copy()
print(f"a is b: {a is b}")

# but, 리스트 안의 리스트의 요소를 변화시키면 copy가 무색해 진다.
a[-2][0] = 99
print(f"a = {a}")
print(f"b = {b}")

# Must perform a deepcopy
import copy

a = [1, 2, [3, 4], 5]
b = copy.deepcopy(a)
print(f"a is b: {a is b}")
a[-2][0] = 99
print(f"a = {a}")
print(f"b = {b}")

# 결과
a is b: True
a is b: False
a is b: False
a = [1, 2, [99, 4], 5]
b = [1, 2, [99, 4], 5]
a is b: False
a = [1, 2, [99, 4], 5]
b = [1, 2, [3, 4], 5]

import math

print(math.pi)
print(math.e)
print(math.gcd(60, 76))
print(math.dist([5, 8], [3, 1]))
print(math.log(28))

# 결과
3.141592653589793
2.718281828459045
4
7.280109889280518
3.332204510175204

stats = [98, 77, 88, 65, 90, 54, 52, 52]

import statistics as st

print(st.mean(stats))
print(st.median(stats))
print(st.mode(stats))
print(st.variance(stats))
print(st.stdev(stats))

# 결과
72
71.0
52
350.57142857142856
18.723552776421162

import sys

print(sys.version)
print(sys.executable)

# 결과
3.13.0 (tags/v3.13.0:60403a5, Oct  7 2024, 09:38:07) [MSC v.1941 64 bit (AMD64)]
C:\Users\...\Python\python.exe