Untitled

透過Extract, Transform, Load可以從不同的來源提取檔案

Untitled

資料清洗: https://datama.com.tw/20201216b1/

Untitled

Untitled

Untitled

import csv

with open("file.csv", newline="", encoding="cp950") as f:
    csv_data = csv.reader(f)
    print(csv_data)

# <_csv.reader object at 0x000001E9A0A54760>

    for row in csv_data:
        print(row)

#['id', 'name', 'year of birth']
#['1', 'albert einstein', '1879']
#['2', 'isaac newton', '1643']
#['3', 'marie curie', '1867']
#['4', 'galil��e', '1564']

		print(row[1].title())
#把第一個字做成大寫

#Name
#Albert Einstein
#Isaac Newton
#Marie Curie
#Galil��E

如果沒用new line: https://discuss.codecademy.com/t/what-does-the-newline-argument-do/463575/28

Untitled

記得使用new line = “””

import csv

with open("new.csv", mode="w", newline="", encoding="cp950") as f:
    csv_writer = csv.writer(f, delimiter=",")
    csv_writer.writerow(['a', 'b', 'c'])
		#會創造一個新的new.csv檔案
import csv

with open("new.csv", mode="w", newline="", encoding="cp950") as f:
    csv_writer = csv.writer(f, delimiter=",")
		#delimiter是keyword argument, 用來設定如何間隔資料

    csv_writer.writerows(['e', 'f', 'g'], ['h', 'i', 'j'])
		#這些東西都會被加到剛剛的檔案去

code sample: https://www.programiz.com/python-programming/writing-csv-files

Untitled

from openpyxl import load_workbook
wb = load_workbook("Dodgers.xlsx")

result = []

ws = wb.worksheets[0]
#第一張worksheet

for row in ws.iter_rows():
    #list comprehension 從第一行開始讀
    result.append([cell.value for cell in row])

print(result)

#[['Rk', 'Pos', 'Name', 'Age', 'G', 'PA', 'AB', 'R', 'H', '2B', '3B', 'HR', 'RBI', 'SB', 'CS', 'BB', 'SO', 'BA', 'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP', 'SH', 'SF', 'IBB'], [1, 'C', 'Will Smith', 26, 130, 501, 414, 71, 107, 19, 2, 25, 76, 3, 0, 58, 101, 0.258, 0.365, 0.495, 0.86, 130, 205, 11, 18, 0, 11, 4], [2, '1B', 'Max Muncy', 30, 144, 592, 497, 95, 124, 26, 2, 36, 94, 2, 1, 83, 120, 0.249, 0.368, 0.527, 0.895, 138, 262, 7, 11, 0, 1, 5], [3, '2B', 'Trea Turner', 28, 52, 226, 207, 41, 70, 17, 0, 10, 28, 11, 2, 15, 33, 0.338, 0.385, 0.565, 0.95, 152, 117, 5, 2, 0, 2, 2], [4, 'SS', 'Corey Seager', 27, 95, 409, 353, 54, 108, 22, 3, 16, 57, 1, 1, 48, 66, 0.306, 0.394, 0.521, 0.915, 145, 184, 8, 5, 0, 3, 2], [5, '3B', 'Justin Turner', 36, 151, 612, 533, 87, 148, 22, 0, 27, 87, 3, 0, 61, 98, 0.278, 0.361, 0.471, 0.832, 123, 251, 12, 12, 0, 6, 0], [6, 'LF', 'AJ Pollock', 33, 117, 422, 384, 53, 114, 27, 1, 21, 69, 9, 1, 30, 80, 0.297, 0.355, 0.536, 0.892, 137, 206, 4, 6, 0, 2, 4], [7, 'CF', 'Cody Bellinger', 25, 95, 350, 315, 39, 52, 9, 2, 10, 36, 3, 1, 31, 94, 0.165, 0.24, 0.302, 0.542, 45, 95, 2, 1, 0, 3, 2], [8, 'RF', 'Mookie Betts', 28, 122, 550, 466, 93, 123, 29, 3, 23, 58, 10, 5, 68, 86, 0.264, 0.367, 0.487, 0.854, 128, 227, 5, 11, 0, 5, 2]]