diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..757fee31 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/.idea \ No newline at end of file diff --git a/04/dicset.py b/04/dicset.py index 1bc3c366..68c59b93 100644 --- a/04/dicset.py +++ b/04/dicset.py @@ -4,133 +4,132 @@ # 根据商品ID求价格,用列表 def find_product_price(products, product_id): - for id, price in products: - if id == product_id: - return price - return None - + for id, price in products: + if id == product_id: + return price + return None + + products = [ - (1, 100), - (2, 400), - (3, 50), - (4, 400) + (1, 100), + (2, 400), + (3, 50), + (4, 400) ] # 更改需求,找出有多少种不同的价格 def find_unique_price(products): - unique_price_list = [] - for _, price in products: - if price not in unique_price_list: - unique_price_list.append(price) - return len(unique_price_list) - - + unique_price_list = [] + for _, price in products: + if price not in unique_price_list: + unique_price_list.append(price) + return len(unique_price_list) + + # 字典版 def find_unique_price_set(products): - unique_price_set = set() - for _, price in products: - unique_price_set.add(price) - return len(unique_price_set) + unique_price_set = set() + for _, price in products: + unique_price_set.add(price) + return len(unique_price_set) if __name__ == "__main__": # 初始化字典和集合 - d1 = {"name":"jason", "age":20, "gender":"male"} - d2 = dict({"name":"jason", "age":20, "gender":"male"}) + d1 = {"name": "jason", "age": 20, "gender": "male"} + d2 = dict({"name": "jason", "age": 20, "gender": "male"}) d3 = dict([('name', 'jason'), ('age', 20), ('gender', 'male')]) d4 = dict(name='jason', age=20, gender='male') print(d1 == d2 == d3 == d4) - + s1 = {1, 2, 3} s2 = set([1, 2, 3]) print(s1 == s2) - + # 混合类型 s = {1, "hello", 5.0} print(s) - + # 元素访问 - d = {"name":"zym", "age":20} + d = {"name": "zym", "age": 20} print(d["name"]) print(d.get("age")) print(d.get("locate", "null")) - + s = {1, 2, 3} # print(s[1]) #本行出错 - + # 判断元素是否在字典/集合内 s = {1, 2, 3} print(1 in s) print(10 in s) - d = {"name":"zym", "age":20} + d = {"name": "zym", "age": 20} print("name" in d) print("location" in d) - + # 增删查改函数 - d = {"name":"zym", "age":20} - d["gender"] = "male" #增加元素 + d = {"name": "zym", "age": 20} + d["gender"] = "male" # 增加元素 d["dob"] = "1999-02-01" print(d) - d["dob"] = "1998-01-01" #更新键值 + d["dob"] = "1998-01-01" # 更新键值 print(d) - d.pop("dob") #删除键值 + d.pop("dob") # 删除键值 print(d) - + s = {1, 2, 3} - s.add(4) #增加元素 + s.add(4) # 增加元素 print(s) - s.remove(4) #删除元素 + s.remove(4) # 删除元素 print(s) - + # 字典排序 - d = {'b':1, 'a':2, 'c':10} + d = {'b': 1, 'a': 2, 'c': 10} # 根据字典键的升序排序 - d_sorted_by_key = sorted(d.items(), key = lambda x:x[0]) + d_sorted_by_key = sorted(d.items(), key=lambda x: x[0]) print(d_sorted_by_key) # 根据字典值的升序排序 - d_sorted_by_value = sorted(d.items(), key = lambda x:x[1]) + d_sorted_by_value = sorted(d.items(), key=lambda x: x[1]) print(d_sorted_by_value) - + # 对集合排序 - s = [3, 4, 2, 1] + s = {3, 4, 2, 1} s_sorted = sorted(s) print(s_sorted) - + # 根据商品ID找商品价格 print("id为2的商品价格为{}".format(find_product_price(products, 2))) - + # 用字典来存储 products_set = { - 1:100, - 2:400, - 3:50, - 4:400} + 1: 100, + 2: 400, + 3: 50, + 4: 400} print("id为3的商品价格为{}".format(products_set[3])) - + # 看商品里有多少种不同的价格? # 列表版, O(N2) - print("不同价格的数目为{}".format( find_unique_price(products))) + print("不同价格的数目为{}".format(find_unique_price(products))) # 字典版,O(N) - print("不同价格的数目为{}".format( find_unique_price_set(products))) - + print("不同价格的数目为{}".format(find_unique_price_set(products))) + # 计算效率 import time - + id = [x for x in range(0, 10000)] price = [x for x in range(20000, 30000)] products = list(zip(id, price)) - + # 计算列表版本的时间 start_using_list = time.perf_counter() find_unique_price(products) end_using_list = time.perf_counter() print("使用列表耗时:{}".format(end_using_list - start_using_list)) - + # 计算字典版本的时间 start_using_set = time.perf_counter() find_unique_price_set(products) end_using_set = time.perf_counter() print("使用列表耗时:{}".format(end_using_set - start_using_set)) - - \ No newline at end of file diff --git a/05/string.py b/05/string.py index 7f35653d..b8154515 100644 --- a/05/string.py +++ b/05/string.py @@ -1,54 +1,60 @@ -#-*- coding:utf8-*- +# -*- coding:utf8-*- # 基础篇:05深入浅出字符串 - +import time if __name__ == "__main__": - name = 'aaa' - city = "bbb" - text = """ccc""" - print(name, city, text) - - # 转义符 - s = "a\nb\tc" - print(s) - print(len(s)) - - # 索引切片遍历 - name = "jason" - print(name[0]) - print(name[1:3]) - for char in name: - print(char) - - # 改变字符串 - s = "hello" - s = 'H' + s[1:] - print(s) - s = s.replace('H', 'h') - print(s) - - # 字符串拼接,时间复杂度O(N) - s = '' - for n in range(0, 100000): - s += str(n) - print(s) - - # join函数 时间复杂度O(N) - l = [] - for n in range(0, 100000): - l.append(str(n)) - l = ' '.join(l) - print(l) - - # split分割数据 - path = "hive://ads/training_table" - namespace = path.split('//')[1].split('/')[0] - table = path.split('//')[1].split('/')[1] - print(namespace, table) - - # strip函数 - s = " my name is jason " - print(s.strip()) - - # 字符串格式化函数 - print("我的名字叫{},年龄{}".format("zym", str(35))) \ No newline at end of file + name = 'aaa' + city = "bbb" + text = """ccc""" + print(name, city, text) + + # 转义符 + s = "a\nb\tc" + print(s) + print(len(s)) + + # 索引切片遍历 + name = "jason" + print(name[0]) + print(name[1:3]) + for char in name: + print(char) + + # 改变字符串 + s = "hello" + s = 'H' + s[1:] + print(s) + s = s.replace('H', 'h') + print(s) + + # 字符串拼接,时间复杂度O(N) + start_using_list = time.perf_counter() + s = '' + for n in range(0, 1000): + s += str(n) + # print(s) + end_using_list = time.perf_counter() + print("使用字符串拼接耗时:{}".format(end_using_list - start_using_list)) + + # join函数 时间复杂度O(N) + start_using_list = time.perf_counter() + l = [] + for n in range(0, 1000): + l.append(str(n)) + l = ' '.join(l) + # print(l) + end_using_list = time.perf_counter() + print("使用join函数耗时:{}".format(end_using_list - start_using_list)) + + # split分割数据 + path = "hive://ads/training_table" + namespace = path.split('//')[1].split('/')[0] + table = path.split('//')[1].split('/')[1] + print(namespace, table) + + # strip函数 + s = " my name is jason " + print(s.strip()) + + # 字符串格式化函数 + print("我的名字叫{},年龄{}".format("zym", str(35))) \ No newline at end of file diff --git a/06/inout.py b/06/inout.py index 4e029bba..e245a9e3 100644 --- a/06/inout.py +++ b/06/inout.py @@ -1,6 +1,5 @@ -#coding:utf-8 -#第6课:输入与输出 - +# coding:utf-8 +# 第6课:输入与输出 import re import json @@ -8,105 +7,105 @@ # 处理文本 def parse(text): - # 去除标点符号和换行 - text = re.sub(r'[^\w ]', ' ', text) - # 转为小写 - text = text.lower() - # 单词列表 - word_list = text.split(' ') - # 去除空白单词 - word_list = filter(None, word_list) - # 生成单词和词频的字典 - word_cnt = {} - for word in word_list: - if word not in word_cnt: - word_cnt[word] = 0 - word_cnt[word] += 1 - - # 按照词频排序 - sorted_word_cnt = sorted(word_cnt.items(), key = lambda kv: kv[1], reverse = True) - return sorted_word_cnt - - + # 去除标点符号和换行 + text = re.sub(r'[^\w ]', ' ', text) + # 转为小写 + text = text.lower() + # 单词列表 + word_list = text.split(' ') + # 去除空白单词 + word_list = filter(None, word_list) + # 生成单词和词频的字典 + word_cnt = {} + for word in word_list: + if word not in word_cnt: + word_cnt[word] = 0 + word_cnt[word] += 1 + + # 按照词频排序 + sorted_word_cnt = sorted(word_cnt.items(), key=lambda kv: kv[1], reverse=True) + return sorted_word_cnt + + # readline版本的parse,练习1 # 处理文本 def parse_readline(infile): - # 生成单词和词频的字典 - word_cnt = {} - while True: - text = infile.readline() - if not text: - break - print(text) - # 去除标点符号和换行 - text = re.sub(r'[^\w ]', ' ', text) - # 转为小写 - text = text.lower() - # 单词列表 - word_list = text.split(' ') - # 去除空白单词 - word_list = filter(None, word_list) - - for word in word_list: - if word not in word_cnt: - word_cnt[word] = 0 - word_cnt[word] += 1 - - # 按照词频排序 - sorted_word_cnt = sorted(word_cnt.items(), key = lambda kv: kv[1], reverse = True) - return sorted_word_cnt - + # 生成单词和词频的字典 + word_cnt = {} + while True: + text = infile.readline() + if not text: + break + print(text) + # 去除标点符号和换行 + text = re.sub(r'[^\w ]', ' ', text) + # 转为小写 + text = text.lower() + # 单词列表 + word_list = text.split(' ') + # 去除空白单词 + word_list = filter(None, word_list) + + for word in word_list: + if word not in word_cnt: + word_cnt[word] = 0 + word_cnt[word] += 1 + + # 按照词频排序 + sorted_word_cnt = sorted(word_cnt.items(), key=lambda kv: kv[1], reverse=True) + return sorted_word_cnt + if __name__ == "__main__": - """ - # 输入 - name = input("姓名:") - gender = input("男的?(y/n)") - - welcome_str = "欢迎来到矩阵空间{prefix}{name}." - welcome_dic = { - "prefix":"Mr." if gender == 'y' else "Mrs.", - "name":name - } - print(welcome_str.format(**welcome_dic)) - - # 输入类型转换 - a = input("输入a:") - b = input("输入b:") - print("a + b ={}".format(a+b)) - print("a的类型为{},b的类型为{}".format(type(a), type(b))) - print("a + b ={}".format(int(a) + int(b))) - """ - # 文件输入输出 - with open("in.txt", "r") as fin: - text = fin.read() - - word_and_freq = parse(text) - - with open("out.txt", "w") as fout: - for word, freq in word_and_freq: - fout.write('{} {}\n'.format(word, freq)) - - # 使用JSON - params = { - "symbol" : "123456", - "type" : "limit", - "price" : 123.4, - "amount" : 23 - } - params_str = json.dumps(params) - print("序列化以后") - print("类型{},值{}".format(type(params_str), params_str)) - - original_params = json.loads(params_str) - print("在去序列化之后") - print("类型{},值{}".format(type(original_params), original_params)) - - # 思考题1 - with open("in.txt", "r") as fin: - word_and_freq = parse_readline(fin) - - with open("out_readline.txt", "w") as fout: - for word, freq in word_and_freq: - fout.write('{} {}\n'.format(word, freq)) - \ No newline at end of file + """ + # 输入 + name = input("姓名:") + gender = input("男的?(y/n)") + + welcome_str = "欢迎来到矩阵空间{prefix}{name}." + welcome_dic = { + "prefix": "Mr." if gender == 'y' else "Mrs.", + "name": name + } + print(welcome_str.format(**welcome_dic)) + + # 输入类型转换 + a = input("输入a:") + b = input("输入b:") + print("a + b ={}".format(a + b)) + print("a的类型为{},b的类型为{}".format(type(a), type(b))) + print("a + b ={}".format(int(a) + int(b))) + """ + + # 文件输入输出 + with open("in.txt", "r") as fin: + text = fin.read() + + word_and_freq = parse(text) + + with open("out.txt", "w") as fout: + for word, freq in word_and_freq: + fout.write('{} {}\n'.format(word, freq)) + + # 使用JSON + params = { + "symbol": "123456", + "type": "limit", + "price": 123.4, + "amount": 23 + } + params_str = json.dumps(params) + print("序列化以后") + print("类型{},值{}".format(type(params_str), params_str)) + + original_params = json.loads(params_str) + print("在去序列化之后") + print("类型{},值{}".format(type(original_params), original_params)) + + # 思考题1 + with open("in.txt", "r") as fin: + word_and_freq = parse_readline(fin) + + with open("out_readline.txt", "w") as fout: + for word, freq in word_and_freq: + fout.write('{} {}\n'.format(word, freq)) diff --git a/07/ifloop.py b/07/ifloop.py index 4d06c4e8..0e2ce429 100644 --- a/07/ifloop.py +++ b/07/ifloop.py @@ -3,121 +3,123 @@ if __name__ == "__main__": - # 条件语句 - x = -3 - if x < 0: - y = -x - else: - y = x - print(y) - - # elif语句 - id = 2 - if id == 0: - print("red") - elif id == 1: - print("yellow") - else: - print("green") - - # 循环 - l = [1, 2, 3, 4] - for item in l: - print(item) - - # 字典循环 - d = { - "name":"jason", - "dob":"2000-01-01", - "gender":"male" - } - for k in d: - print(k) - - for v in d.values(): - print(v) - - for k, v in d.items(): - print("keys:{}, values:{}".format(k, v)) - - # 用索引来循环 - l = [1,2,3,4,5,6,7] - for index in range(0, len(l)): - if index < 5: - print(l[index]) - - # 用索引和元素来循环 - l = [1,2,3,4,5,6,7] - for index, item in enumerate(l): - if index < 5: - print(item) - - # break和continue - name_price = {"一":100, - "二":10, - "三":10000} - name_color = {"一":"红", - "二":"蓝", - "三":"红"} - # 不用continue - for name, price in name_price.items(): - if price < 1000: - if name in name_color: - for color in name_color[name]: - if color != "红": - print("name:{}, color:{}".format(name, color)) - else: - print("name:{}, color:{}".format(name, None)) - # 用continue - for name, price in name_price.items(): - if price>= 1000: - continue - if name not in name_color: - print("name:{}, color:{}".format(name, None)) - continue - for color in name_color[name]: - if color == "red": - continue - print("name:{}, color:{}".format(name, color)) - - # while循环 - l = [1,2,3,4] - index = 0 - while index < len(l): - print(l[index]) - index += 1 - - # 测试for和while的效率 - import time - start_for = time.perf_counter() - for i in range(0, 1000000): - pass - end_for = time.perf_counter() - print("for循环{}秒".format(end_for-start_for)) - start_while = time.perf_counter() - i = 0 - while i < 1000000: - i += 1 - end_while = time.perf_counter() - print("while循环{}秒".format(end_while-start_while)) - - # 思考题 - attributes = ['name', 'dob', 'gender'] - values = [ - ['jason', '2000-01-01', 'male'], - ['mike', '1999-01-01', 'male'], - ['nancy', '2001-02-01', 'female'] - ] - # 多行循环语句 - result = [] - for index in range(0, len(values)): - temp = {} - for j in range(3): - temp[attributes[j]]=values[index][j] - result.append(temp) - print(result) - # 一行条件循环语句 抄同学的 - result = [dict(zip(attributes,v)) for v in values] - print(result) + # 条件语句 + x = -3 + if x < 0: + y = -x + else: + y = x + print(y) + + # elif语句 + id = 2 + if id == 0: + print("red") + elif id == 1: + print("yellow") + else: + print("green") + + # 循环 + l = [1, 2, 3, 4] + for item in l: + print(item) + + # 字典循环 + d = { + "name": "jason", + "dob": "2000-01-01", + "gender": "male" + } + for k in d: + print(k) + + for v in d.values(): + print(v) + + for k, v in d.items(): + print("keys:{}, values:{}".format(k, v)) + + # 用索引来循环 + l = [1, 2, 3, 4, 5, 6, 7] + for index in range(0, len(l)): + if index < 5: + print(l[index]) + + # 用索引和元素来循环 + l = [1, 2, 3, 4, 5, 6, 7] + for index, item in enumerate(l): + if index < 5: + print(item) + + # break和continue + name_price = {"一": 100, + "二": 10, + "三": 10000} + name_color = {"一": ["红"], + "二": ["蓝"], + "三": ["红"]} + # 不用continue + for name, price in name_price.items(): + if price < 1000: + if name in name_color: + for color in name_color[name]: + if color != "红": + print("name:{}, color:{}".format(name, color)) + else: + print("name:{}, color:{}".format(name, None)) + # 用continue + for name, price in name_price.items(): + if price>= 1000: + continue + if name not in name_color: + print("name:{}, color:{}".format(name, None)) + continue + for color in name_color[name]: + if color == "红": + continue + print("name:{}, color:{}".format(name, color)) + + # while循环 + l = [1, 2, 3, 4] + index = 0 + while index < len(l): + print(l[index]) + index += 1 + + # 测试for和while的效率 + import time + + start_for = time.perf_counter() + for i in range(0, 1000000): + pass + end_for = time.perf_counter() + print("for循环{}秒".format(end_for - start_for)) + start_while = time.perf_counter() + i = 0 + while i < 1000000: + i += 1 + end_while = time.perf_counter() + print("while循环{}秒".format(end_while - start_while)) + print("同等循环次数,for循环是while循环执行效率的{}倍".format((end_while - start_while) / (end_for - start_for))) + + # 思考题 + attributes = ['name', 'dob', 'gender'] + values = [ + ['jason', '2000-01-01', 'male'], + ['mike', '1999-01-01', 'male'], + ['nancy', '2001-02-01', 'female'] + ] + # 多行循环语句 + result = [] + for index in range(0, len(values)): + temp = {} + for j in range(0, len(attributes)): + temp[attributes[j]] = values[index][j] + result.append(temp) + print(result) + # 一行条件循环语句 抄同学的 + result = [dict(zip(attributes, v)) for v in values] + print(result) \ No newline at end of file diff --git a/08/except.py b/08/except.py index 4486c99f..2c57e4c8 100644 --- a/08/except.py +++ b/08/except.py @@ -4,31 +4,30 @@ # 自定义异常类 class MyInputError(Exception): - def __init__(self, value): - self.value = value - - def __str__(self): - return("{} is invalid input".format(repr(self.value))) + def __init__(self, value): + self.value = value + + def __str__(self): + return "{} is invalid input".format(repr(self.value)) if __name__ == "__main__": - # try except语句 - try: - s = input("输入数字,以,分隔:") - num1 = int(s.split(",")[0].strip()) - num2 = int(s.split(",")[1].strip()) - - except ValueError as err: - print("值错误:{}".format(err)) - except Exception as err: - print("其它异常:{}".format(err)) - - print("继续") - - # 自定义异常 - try: - raise MyInputError(1) - except MyInputError as err: - print("error:{}".format(err)) - print("继续2") - \ No newline at end of file + # try except语句 + try: + s = input("输入数字,以,分隔:") + num1 = int(s.split(",")[0].strip()) + num2 = int(s.split(",")[1].strip()) + + except ValueError as err: + print("值错误:{}".format(err)) + except Exception as err: + print("其它异常:{}".format(err)) + + print("继续") + + # 自定义异常 + try: + raise MyInputError(1) + except MyInputError as err: + print("error:{}".format(err)) + print("继续2") diff --git a/09/fun.py b/09/fun.py index 5ccfad6f..ae75ad96 100644 --- a/09/fun.py +++ b/09/fun.py @@ -3,114 +3,138 @@ # 调用另一个函数 def func(message): - my_func(message) + my_func(message) -def my_func(message): - print("收到一个消息:{}".format(message)) +def my_func(message): + print("收到一个消息:{}".format(message)) if __name__ == "__main__": - my_func("hello world!") - - # 函数嵌套 - def my_sum(a, b): - return a+b - - result = my_sum(3, 5) - print(result) - - def find_largest_element(l): - if not isinstance(l, list): - print("输入数据不是列表") - return - if len(l) == 0: - print("列表为空") - return - largest_element = l[0] - for item in l: - if item> largest_element: - largest_element = item - print("列表中最大元素为:{}".format(largest_element)) - - find_largest_element([3, -5, 6, 8, 2, 1]) - - func("你好,python") - - # 参数的多态性 - print(my_sum([1, 2], [3, 4])) - print("hell", " world") - try: - my_sum(5, "7") - except Exception as err: - print("发生错误!{}".format(err)) - - # 函数嵌套提高效率 - def factorial(input): - # 输入检查,只运行一次 - if not isinstance(input, int): - raise Exception("必须输入整数") - if input < 0: - raise Exception("输入必须大于等于0") - - # 实际计算 - def inner_factorial(input): - if input <= 1: - return 1 - return input*inner_factorial(input-1) - - return(inner_factorial(input)) - - try: - print(factorial(12)) - except Exception as err: - print(err) - - # 函数中改变外部变量 - value = 2 - overvalue = 3 - def changeValue(): - global value - value += 1 - overvalue = 6 - print(value, overvalue) - changeValue() - print(value) - - # 嵌套函数内部修改 - # 加nonlocal - print("加nonlocal") - def outer(): - x = 3 - def inner(): - nonlocal x - x = 5 - print("内部", x) - print("外部", x) - inner() - print("外部", x) - outer() - # 不加 - print("不加") - def outer2(): - x = 3 - def inner2(): - x = 5 - print("内部", x) - print("外部", x) - inner2() - print("外部", x) - outer2() - - # 闭包,计算n次幂 - def nth_power(exp): - def exponent_of(base): - return base**exp - return exponent_of - - square = nth_power(2) - cube = nth_power(3) - - print(square(2)) - print(cube(2)) - \ No newline at end of file + my_func("hello world!") + + + # 函数嵌套 + def my_sum(a, b): + return a + b + + + result = my_sum(3, 5) + print(result) + + + def find_largest_element(l): + if not isinstance(l, list): + print("输入数据不是列表") + return + if len(l) == 0: + print("列表为空") + return + largest_element = l[0] + for item in l: + if item> largest_element: + largest_element = item + print("列表中最大元素为:{}".format(largest_element)) + + + find_largest_element([3, -5, 6, 8, 2, 1]) + + func("你好,python") + + # 参数的多态性 + print(my_sum([1, 2], [3, 4])) + print("hell", " world") + try: + my_sum(5, "7") + except Exception as err: + print("发生错误!{}".format(err)) + + + # 函数嵌套提高效率 + def factorial(input): + # 输入检查,只运行一次 + if not isinstance(input, int): + raise Exception("必须输入整数") + if input < 0: + raise Exception("输入必须大于等于0") + + # 实际计算 + def inner_factorial(input): + if input <= 1: + return 1 + return input * inner_factorial(input - 1) + + return (inner_factorial(input)) + + + try: + print(factorial(12)) + except Exception as err: + print(err) + + # 函数中改变外部变量 + value = 2 + overvalue = 3 + + + def changeValue(): + global value + value += 1 + overvalue = 6 + print(value, overvalue) + + + changeValue() + print(value) + + # 嵌套函数内部修改 + # 加nonlocal + print("加nonlocal") + + + def outer(): + x = 3 + + def inner(): + nonlocal x + x = 5 + print("内部", x) + + print("外部", x) + inner() + print("外部", x) + + + outer() + # 不加 + print("不加") + + + def outer2(): + x = 3 + + def inner2(): + x = 5 + print("内部", x) + + print("外部", x) + inner2() + print("外部", x) + + + outer2() + + + # 闭包,计算n次幂 + def nth_power(exp): + def exponent_of(base): + return base ** exp + + return exponent_of + + + square = nth_power(2) + cube = nth_power(3) + + print(square(2)) + print(cube(2)) diff --git a/10/button.py b/10/button.py index 08aeaca0..5294e45f 100644 --- a/10/button.py +++ b/10/button.py @@ -4,8 +4,8 @@ from tkinter import Button, mainloop button = Button( - text = "This is a button", - command = lambda : print("being pressed") + , + command=lambda: print("being pressed") ) button.pack() mainloop() \ No newline at end of file diff --git a/10/nmfun.py b/10/nmfun.py index b8cc2141..1ac64797 100644 --- a/10/nmfun.py +++ b/10/nmfun.py @@ -3,52 +3,57 @@ if __name__ == "__main__": - # lambda表达式 - square = lambda x:x**2 - print(square(3)) - - # 列表内部使用 - l = [(lambda x:x**2)(x) for x in range(10)] - print(l) - - # 用作函数参数 - l = [(1, 20), (3, 0), (9, 10), (2, -1)] - l.sort(key = lambda x:x[1]) - print(l) - - # 让程序简洁 - squares = map(lambda x:x**2, [1,2,3,4,5]) - print(list(squares)) - - # 函数式编程,将列表元素加倍 - def mutiply_2_pure(l): - new_list = [] - for item in l: - new_list.append(item*2) - return new_list - - print(mutiply_2_pure([1,2,3,4])) - - # map函数 - l = [1,3,5,6,8] - new_list = list(map(lambda x:x**2, l)) - print(new_list) - - # filter函数,返回列表中所有偶数 - l = [1,2,3,4,5,6,7,8,9] - new_list = filter(lambda x:x%2 == 0, l) - print(list(new_list)) - - # reduce函数 计算阶乘 - from functools import reduce - product = reduce(lambda x, y:x*y, l) - print(product) - - # 思考题 - # 1 将字典按值从大到小排序 - import operator - d = {"mike":10, "lucy":2, "ben":30} - print(d.items()) - sort_d = sorted(d.items(), key=operator.itemgetter(1), reverse=True) - print(sort_d) - + # lambda表达式 + square = lambda x: x ** 2 + print(square(3)) + + # 列表内部使用 + l = [(lambda x: x ** 2)(x) for x in range(10)] + print(l) + + # 用作函数参数 + l = [(1, 20), (3, 0), (9, 10), (2, -1)] + l.sort(key=lambda x: x[1]) + print(l) + + # 让程序简洁 + squares = map(lambda x: x ** 2, [1, 2, 3, 4, 5]) + print(list(squares)) + + + # 函数式编程,将列表元素加倍 + def mutiply_2_pure(l): + new_list = [] + for item in l: + new_list.append(item * 2) + return new_list + + + print(mutiply_2_pure([1, 2, 3, 4])) + + # map函数 + l = [1, 3, 5, 6, 8] + new_list = list(map(lambda x: x ** 2, l)) + print(new_list) + + # filter函数,返回列表中所有偶数 + l = [1, 2, 3, 4, 5, 6, 7, 8, 9] + new_list = filter(lambda x: x % 2 == 0, l) + print(list(new_list)) + + # reduce函数 计算阶乘 + from functools import reduce + + product = reduce(lambda x, y: x * y, l) + print(product) + + # 思考题 + # 1 将字典按值从大到小排序 + import operator + + d = {"mike": 10, "lucy": 2, "ben": 30} + print(d.items()) + sort_d = sorted(d.items(), key=operator.itemgetter(1), reverse=True) + print(sort_d) + + print(sorted(d.items(), key=lambda x: x[1], reverse=True)) diff --git a/11/class.py b/11/class.py index 922d1518..4c7c2cac 100644 --- a/11/class.py +++ b/11/class.py @@ -4,150 +4,157 @@ # 类 class Document(): - def __init__(self, title, author, context): - print("调用初始函数!") - self.title = title - self.author = author - self.__context = context #私有属性 - - def get_context_length(self): - return len(self.__context) - - def intercept_context(self, length): - self.__context = self.__context[:length] - - + def __init__(self, title, author, context): + print("调用初始函数!") + self.title = title + self.author = author + self.__context = context # 私有属性 + + def get_context_length(self): + return len(self.__context) + + def intercept_context(self, length): + self.__context = self.__context[:length] + + # 类2 class Document2(): - WELCOME_STR = "欢迎,本书的内容为{}." - - def __init__(self, title, author, context): - print("调用初始函数!") - self.title = title - self.author = author - self.__context = context #私有属性 - - # 类函数 - @classmethod - def create_empty_book(cls, title, author): - return cls(title=title, author=author, con) - - # 成员函数 - def get_context_length(self): - return len(self.__context) - - # 静态函数 - @staticmethod - def get_welcome(context): - return Document2.WELCOME_STR.format(context) - + WELCOME_STR = "欢迎,本书的内容为{}." + + def __init__(self, title, author, context): + print("调用初始函数!") + self.title = title + self.author = author + self.__context = context # 私有属性 + + # 类函数 + @classmethod + def create_empty_book(cls, title, author): + return cls(title=title, author=author, con) + + # 成员函数 + def get_context_length(self): + return len(self.__context) + + # 静态函数 + @staticmethod + def get_welcome(context): + return Document2.WELCOME_STR.format(context) + + # 类的继承 class Entity(): - def __init__(self, object_type): - print("父类构造函数") - self.object_type = object_type - - def get_contex_length(self): - raise Exception("没有定义get_context_length") - - def print_title(self): - print(self.title) - - + def __init__(self, object_type): + print("父类构造函数") + self.object_type = object_type + + def get_contex_length(self): + raise Exception("没有定义get_context_length") + + def print_title(self): + print(self.title) + + class Document3(Entity): - def __init__(self, title, author, context): - Entity.__init__(self, "document") - print("Document3调用初始函数!") - self.title = title - self.author = author - self.__context = context - - def get_context_length(self): - return len(self.__context) - + def __init__(self, title, author, context): + Entity.__init__(self, "document") + print("Document3调用初始函数!") + self.title = title + self.author = author + self.__context = context + + def get_context_length(self): + return len(self.__context) + + class Video(Entity): - def __init__(self, title, author, video_length): - Entity.__init__(self, "video") - print("video调用初始函数!") - self.title = title - self.author = author - self.__video_length = video_length - - def get_context_length(self): - return self.__video_length - + def __init__(self, title, author, video_length): + Entity.__init__(self, "video") + print("video调用初始函数!") + self.title = title + self.author = author + self.__video_length = video_length + + def get_context_length(self): + return self.__video_length + # 抽象函数和抽象类 from abc import ABCMeta, abstractmethod -class Entity2(metaclass = ABCMeta): - @abstractmethod - def get_title(self): - pass - - @abstractmethod - def set_title(self, title): - pass - + + +class Entity2(metaclass=ABCMeta): + @abstractmethod + def get_title(self): + pass + + @abstractmethod + def set_title(self, title): + pass + class Document4(Entity2): - def get_title(self): - return self.title - - def set_title(self, title): - self.title = title - - + def get_title(self): + return self.title + + def set_title(self, title): + self.title = title + + # 思考题 class A(): - def __init__(self): - print("A") - + def __init__(self): + print("A") + + class B(A): - def __init__(self): - A.__init__(self) - print("B") - + def __init__(self): + A.__init__(self) + print("B") + + class C(A): - def __init__(self): - A.__init__(self) - print("C") - + def __init__(self): + A.__init__(self) + print("C") + + class D(B, C): - def __init__(self): - B.__init__(self) - C.__init__(self) - print("D") + def __init__(self): + B.__init__(self) + C.__init__(self) + print("D") + if __name__ == "__main__": - harry_potter_book = Document("hp", "J.K.Rowling", "aabbccgfdghhddee") - - print(harry_potter_book.title) - print(harry_potter_book.author) - print(harry_potter_book.get_context_length()) - harry_potter_book.intercept_context(10) - print(harry_potter_book.get_context_length()) - # print(harry_potter_book.__context) - - empty_book = Document2.create_empty_book("aaaaa", "bbbbb") - print(empty_book.get_context_length()) - print(empty_book.get_welcome("indeed nothing")) - - # 类继承 - hp_book = Document3("a", "aa", "aaa") - hp_movie = Video("b", "bb", 30) - - print(hp_book.object_type) - print(hp_movie.object_type) - - print(hp_book.get_context_length()) - print(hp_movie.get_context_length()) - - # 抽象类 - document = Document4() - document.set_title("hp") - print(document.get_title()) - - # entity = Entity2() - # 思考题 - d = D() - \ No newline at end of file + harry_potter_book = Document("hp", "J.K.Rowling", "aabbccgfdghhddee") + + print(harry_potter_book.title) + print(harry_potter_book.author) + print(harry_potter_book.get_context_length()) + harry_potter_book.intercept_context(10) + print(harry_potter_book.get_context_length()) + # print(harry_potter_book.__context) + + empty_book = Document2.create_empty_book("aaaaa", "bbbbb") + print(empty_book.get_context_length()) + print(empty_book.get_welcome("indeed nothing")) + + # 类继承 + hp_book = Document3("a", "aa", "aaa") + hp_movie = Video("b", "bb", 30) + + print(hp_book.object_type) + print(hp_movie.object_type) + + print(hp_book.get_context_length()) + print(hp_movie.get_context_length()) + + # 抽象类 + document = Document4() + document.set_title("hp") + print(document.get_title()) + + # entity = Entity2() + # 思考题 + d = D() diff --git a/12/5.txt b/12/5.txt new file mode 100644 index 00000000..497f82cd --- /dev/null +++ b/12/5.txt @@ -0,0 +1,2 @@ +# 5.txt +And when this happens, and when we allow freedom ring, when we let it ring from every village and every hamlet, from every state and every city, we will be able to speed up that day when all of God's children, black men and white men, Jews and Gentiles, Protestants and Catholics, will be able to join hands and sing in the words of the old Negro spiritual: "Free at last! Free at last! Thank God Almighty, we are free at last!" \ No newline at end of file diff --git a/12/search.py b/12/search.py index d0fde773..7564abe4 100644 --- a/12/search.py +++ b/12/search.py @@ -4,194 +4,196 @@ # 搜索引擎基类 class SearchEngineBase(object): - def __init__(self): - print("父类") - - def add_corpus(self, file_path): - with open(file_path, "r") as fin: - text = fin.read() - self.process_corpus(file_path, text) - - def process_corpus(self, id, text): - raise Exception("process_corpus未定义") - - def search(self, query): - raise Exception("search未定义") - - + def __init__(self): + print("父类") + + def add_corpus(self, file_path): + with open(file_path, "r") as fin: + text = fin.read() + self.process_corpus(file_path, text) + + def process_corpus(self, id, text): + raise Exception("process_corpus未定义") + + def search(self, query): + raise Exception("search未定义") + + def main(search_engine): - for file_path in ["1.txt", "2.txt", "3.txt", "4.txt"]: - search_engine.add_corpus(file_path) - - while True: - query = input("输入检索词,输q结束:") - if query == "q": - break - results = search_engine.search(query) - print("found {} result(s):".format(len(results))) - - for result in results: - print(result) - - + for file_path in ["1.txt", "2.txt", "3.txt", "4.txt", "5.txt"]: + search_engine.add_corpus(file_path) + + while True: + query = input("输入检索词,输q结束:") + if query == "q": + break + results = search_engine.search(query) + print("found {} result(s):".format(len(results))) + + for result in results: + print(result) + + # 简单的搜索引擎 class SimpleEngine(SearchEngineBase): - def __init__(self): - super(SimpleEngine, self).__init__() - print("子类") - self.__id_to_texts = {} - - def process_corpus(self, id, text): - self.__id_to_texts[id] = text - - def search(self, query): - results = [] - for id, text in self.__id_to_texts.items(): - if query in text: - results.append(id) - return results + def __init__(self): + super(SimpleEngine, self).__init__() + print("子类") + self.__id_to_texts = {} + + def process_corpus(self, id, text): + self.__id_to_texts[id] = text + + def search(self, query): + results = [] + for id, text in self.__id_to_texts.items(): + if query in text: + results.append(id) + return results # 分词的搜索引擎 import re + class BOWEngine(SearchEngineBase): - def __init__(self): - super(BOWEngine, self).__init__() - self.__id_to_word = {} - - def process_corpus(self, id, text): - self.__id_to_word[id] = self.parse_text_to_word(text) - - def search(self, query): - query_words = self.parse_text_to_word(query) - results = [] - for id, words in self.__id_to_word.items(): - if self.query_match(query_words, words): - results.append(id) - return results - - @staticmethod - def parse_text_to_word(text): - # 使用正则表达式去除标点和换行符 - text = re.sub(r'[^\w ]', ' ', text) - # 转为小写 - text = text.lower() - # 生成所有单词的列表 - word_list = text.split(' ') - # 去除空白单词 - word_list = filter(None, word_list) - # 返回单词的set - return set(word_list) - - @staticmethod - def query_match(query_words, words): - for query_word in query_words: - if query_word not in words: - return False - return True + def __init__(self): + super(BOWEngine, self).__init__() + self.__id_to_word = {} + + def process_corpus(self, id, text): + self.__id_to_word[id] = self.parse_text_to_word(text) + + def search(self, query): + query_words = self.parse_text_to_word(query) + results = [] + for id, words in self.__id_to_word.items(): + if self.query_match(query_words, words): + results.append(id) + return results + + @staticmethod + def parse_text_to_word(text): + # 使用正则表达式去除标点和换行符 + text = re.sub(r'[^\w ]', ' ', text) + # 转为小写 + text = text.lower() + # 生成所有单词的列表 + word_list = text.split(' ') + # 去除空白单词 + word_list = filter(None, word_list) + # 返回单词的set + return set(word_list) + + @staticmethod + def query_match(query_words, words): + for query_word in query_words: + if query_word not in words: # 所有的搜索关键词都要出现在同一篇文章中 + return False + return True + # 减少查询的量 class BOWInvertedIndexEngine(SearchEngineBase): - def __init__(self): - super(BOWInvertedIndexEngine, self).__init__() - self.inverted_index = {} - - def process_corpus(self, id, text): - words = self.parse_text_to_word(text) - for word in words: - if word not in self.inverted_index: - self.inverted_index[word] = [] - self.inverted_index[word].append(id) - - def search(self, query): - query_words = list(self.parse_text_to_word(query)) - query_words_index = list() - for query_word in query_words: - query_words_index.append(0) - - # 如果某一单词倒序索引,立即返回 - for query_word in query_words: - if query_word not in self.inverted_index: - return [] - - result = [] - while True: - # 首先获得当前状态下所有倒序索引的index - current_ids = [] - for idx, query_word in enumerate(query_words): - current_index = query_words_index[idx] - current_inverted_list = self.inverted_index[query_word] - # 已经遍历到某个倒序索引的末尾,结束 - if current_index>= len(current_inverted_list): - return result - current_ids.append(current_inverted_list[current_index]) - - # 然后,如果 current_ids 的所有元素都一样,那么表明这个单词在这个元素对应的文档中都出现了 - if all(x == current_ids[0] for x in current_ids): - result.append(current_ids[0]) - query_words_index = [x+1 for x in query_words_index] - continue - - # 如果不是,把最小元素加1 - min_val = min(current_ids) - min_val_pos = current_ids.index(min_val) - query_words_index[min_val_pos] += 1 - - @staticmethod - def parse_text_to_word(text): - # 使用正则表达式去除标点和换行符 - text = re.sub(r'[^\w ]', ' ', text) - # 转为小写 - text = text.lower() - # 生成所有单词的列表 - word_list = text.split(' ') - # 去除空白单词 - word_list = filter(None, word_list) - # 返回单词的set - return set(word_list) + def __init__(self): + super(BOWInvertedIndexEngine, self).__init__() + self.inverted_index = {} + + def process_corpus(self, id, text): + words = self.parse_text_to_word(text) + for word in words: + if word not in self.inverted_index: + self.inverted_index[word] = [] + self.inverted_index[word].append(id) + + def search(self, query): + query_words = list(self.parse_text_to_word(query)) + query_words_index = list() + for query_word in query_words: + query_words_index.append(0) + + # 如果某一单词倒序索引,立即返回 + for query_word in query_words: + if query_word not in self.inverted_index: + return [] + + result = [] + while True: + # 首先获得当前状态下所有倒序索引的index + current_ids = [] + for idx, query_word in enumerate(query_words): + current_index = query_words_index[idx] + current_inverted_list = self.inverted_index[query_word] + # 已经遍历到某个倒序索引的末尾,结束 + if current_index>= len(current_inverted_list): + return result + current_ids.append(current_inverted_list[current_index]) + + # 然后,如果 current_ids 的所有元素都一样,那么表明这个单词在这个元素对应的文档中都出现了 + if all(x == current_ids[0] for x in current_ids): + result.append(current_ids[0]) + query_words_index = [x + 1 for x in query_words_index] + continue + + # 如果不是,把最小元素加1 + min_val = min(current_ids) + min_val_pos = current_ids.index(min_val) + query_words_index[min_val_pos] += 1 + + @staticmethod + def parse_text_to_word(text): + # 使用正则表达式去除标点和换行符 + text = re.sub(r'[^\w ]', ' ', text) + # 转为小写 + text = text.lower() + # 生成所有单词的列表 + word_list = text.split(' ') + # 去除空白单词 + word_list = filter(None, word_list) + # 返回单词的set + return set(word_list) # 缓存和多重继承 import pylru + class LRUCache(object): - def __init__(self, size = 2): - self.cache = pylru.lrucache(size) - - def has(self, key): - return key in self.cache - - def get(self, key): - return self.cache[key] - - def set(self, key, value): - self.cache[key] = value - + def __init__(self, size=2): + self.cache = pylru.lrucache(size) + + def has(self, key): + return key in self.cache + + def get(self, key): + return self.cache[key] + + def set(self, key, value): + self.cache[key] = value + class BOWInvertedIndexEngineWithCache(BOWInvertedIndexEngine, LRUCache): - def __init__(self): - super(BOWInvertedIndexEngineWithCache, self).__init__() - LRUCache.__init__(self) - - def search(self, query): - if self.has(query): - print("缓存命中!") - return self.get(query) - - result = super(BOWInvertedIndexEngineWithCache, self).search(query) - self.set(query, result) - - return result + def __init__(self): + super(BOWInvertedIndexEngineWithCache, self).__init__() + LRUCache.__init__(self) + + def search(self, query): + if self.has(query): + print("缓存命中!") + return self.get(query) + + result = super(BOWInvertedIndexEngineWithCache, self).search(query) + self.set(query, result) + + return result if __name__ == "__main__": - # search_engine = SimpleEngine() - # main(search_engine) - # search_engine = BOWEngine() - # main(search_engine) - # search_engine = BOWInvertedIndexEngine() - # main(search_engine) - search_engine = BOWInvertedIndexEngineWithCache() - main(search_engine) - \ No newline at end of file + # search_engine = SimpleEngine() + # main(search_engine) + # search_engine = BOWEngine() + # main(search_engine) + # search_engine = BOWInvertedIndexEngine() + # main(search_engine) + search_engine = BOWInvertedIndexEngineWithCache() + main(search_engine) diff --git a/13/main.py b/13/main.py index 5036c9d5..e618045a 100644 --- a/13/main.py +++ b/13/main.py @@ -9,11 +9,10 @@ if __name__ == "__main__": - print(get_sum(1, 2)) - - encoder = Encoder() - decoder = Decoder() - - print(encoder.encode("abcde")) - print(decoder.decode("edcba")) - \ No newline at end of file + print(get_sum(1, 2)) + + encoder = Encoder() + decoder = Decoder() + + print(encoder.encode("abcde")) + print(decoder.decode("edcba")) diff --git a/13/src/sub_main.py b/13/src/sub_main.py index 70644257..a6b3ba66 100644 --- a/13/src/sub_main.py +++ b/13/src/sub_main.py @@ -2,18 +2,17 @@ # 第13课 Python模块化 import sys + sys.path.append("..") from utils.class_utils import * from utils.utils import * - if __name__ == "__main__": - print(get_sum(1, 2)) - - encoder = Encoder() - decoder = Decoder() - - print(encoder.encode("abcde")) - print(decoder.decode("edcba")) - \ No newline at end of file + print(get_sum(1, 2)) + + encoder = Encoder() + decoder = Decoder() + + print(encoder.encode("abcde")) + print(decoder.decode("edcba")) diff --git a/13/test1/proto/mat.py b/13/test1/proto/mat.py index cbf3786d..5000c8ef 100644 --- a/13/test1/proto/mat.py +++ b/13/test1/proto/mat.py @@ -3,7 +3,7 @@ # /proto/mat.py class Matrix(object): - def __init__(self, data): - self.data = data - self.n = len(data) - self.m = len(data[0]) + def __init__(self, data): + self.data = data + self.n = len(data) + self.m = len(data[0]) diff --git a/13/test1/src/main.py b/13/test1/src/main.py index 6baac0d9..4fa6390f 100644 --- a/13/test1/src/main.py +++ b/13/test1/src/main.py @@ -2,11 +2,13 @@ # 第13课 Python模块化 # src/main.py import sys + +print(sys.path) sys.path.append("..") from proto.mat import Matrix from utils.mat_mul import mat_mul -a = Matrix([[1,2], [3,4]]) -b = Matrix([[5,6], [7,8]]) +a = Matrix([[1, 2], [3, 4]]) +b = Matrix([[5, 6], [7, 8]]) print(mat_mul(a, b).data) \ No newline at end of file diff --git a/13/test1/utils/mat_mul.py b/13/test1/utils/mat_mul.py index 2b48a7e5..31173d17 100644 --- a/13/test1/utils/mat_mul.py +++ b/13/test1/utils/mat_mul.py @@ -5,12 +5,12 @@ from proto.mat import Matrix def mat_mul(matrix_1: Matrix, matrix_2: Matrix): - assert matrix_1.m == matrix_2.n - n, m, s = matrix_1.n, matrix_1.m, matrix_2.m - result = [[0 for _ in range(n)] for _ in range(s)] - for i in range(n): - for j in range(s): - for k in range(m): - result[i][k] += matrix_1.data[i][j] * matrix_2.data[j][k] - - return Matrix(result) \ No newline at end of file + assert matrix_1.m == matrix_2.n + n, m, s = matrix_1.n, matrix_1.m, matrix_2.m + result = [[0 for _ in range(n)] for _ in range(s)] + for i in range(n): + for j in range(s): + for k in range(m): + result[i][k] += matrix_1.data[i][j] * matrix_2.data[j][k] + + return Matrix(result) diff --git a/13/utils/class_utils.py b/13/utils/class_utils.py index 4bc94506..cd77556a 100644 --- a/13/utils/class_utils.py +++ b/13/utils/class_utils.py @@ -2,11 +2,10 @@ # 第13课 Python模块化 class Encoder(object): - def encode(self, s): - return s[::-1] - - + def encode(self, s): + return s[::-1] + + class Decoder(object): - def decode(self, s): - return ' '.join(reversed(list(s))) - \ No newline at end of file + def decode(self, s): + return ' '.join(reversed(list(s))) diff --git a/13/utils/utils.py b/13/utils/utils.py index 65d1db43..75395718 100644 --- a/13/utils/utils.py +++ b/13/utils/utils.py @@ -2,4 +2,4 @@ # 第13课 Python模块化 def get_sum(a, b): - return a+b + return a + b diff --git a/15/obcopy.py b/15/obcopy.py index 6be03b4e..20326108 100644 --- a/15/obcopy.py +++ b/15/obcopy.py @@ -3,79 +3,102 @@ import copy - if __name__ == "__main__": - a = 2 - b = 2 - print(a == b) - print(a is b) - print("id(a) = {}".format(id(a))) - print("id(b) = {}".format(id(b))) - # 以上只对-5至256的值有效 - a = 10000000 - b = 10000000 - print(a == b) - print(a is b) - print("id(a) = {}".format(id(a))) - print("id(b) = {}".format(id(b))) - - # 对于不可变变量 - t1 = (1, 2, [3, 4]) - t2 = (1, 2, [3, 4]) - print(t1 == t2) - print(id(t1), id(t2)) - t1[-1].append(5) - print(t1 == t2) - print(id(t1), id(t2)) - - # 浅拷贝 - l1 = [1, 2, 3] - l2 = list(l1) - print(l1 == l2) - print(l1 is l2) - s1 = set([1, 2, 3]) - s2 = set(s1) - print(s1, s2) - print(s1 == s2) - print(s1 is s2) - # 通过切片操作 - l1 = [1, 2, 3] - l2 = l1[:] - print(l1 == l2) - print(l1 is l2) - # 使用copy函数 - l2 = copy.copy(l1) - print(l1 == l2) - print(l1 is l2) - # 元组的不同,返回一个指向元组的引用 - t1 = (1,2,3) - t2 = tuple(t1) - print(t1 == t2) - print(t1 is t2) - - # 浅拷贝的副作用 - l1 = [[1, 2], (30, 40)] - l2 = list(l1) - l1.append(100) - l1[0].append(3) - print(l1) - print(l2) - l1[1] += (50, 60) - print(l1) - print(l2) - - # 深拷贝 - l1 = [[1, 2], (30, 40)] - l2 = copy.deepcopy(l1) - l1.append(100) - l1[0].append(3) - print(l1, l2) - # 陷入无限循环的深拷贝 - x = [1] - x.append(x) - print(x) - y = copy.deepcopy(x) - print(y) - # 思考题 - # print(x == y) #报错 - print(x is y) \ No newline at end of file + a = 2 + b = 2 + print(a == b) + print(a is b) + print("id(a) = {}".format(id(a))) + print("id(b) = {}".format(id(b))) + + print("\n################################\n") + + # 以上只对-5至256的值有效 + a = 10000000 + b = 10000000 + print(a == b) + print(a is b) + print("id(a) = {}".format(id(a))) + print("id(b) = {}".format(id(b))) + + print("\n################################\n") + + # 对于不可变变量 + t1 = (1, 2, [3, 4]) + t2 = (1, 2, [3, 4]) + print(t1 == t2) + print(id(t1), id(t2)) + t1[-1].append(5) + print(t1, t2) + print(t1 == t2) + print(id(t1), id(t2)) + + print("\n################################\n") + + # 浅拷贝 + l1 = [1, 2, 3] + l2 = list(l1) + print(l1 == l2) + print(l1 is l2) + s1 = set([1, 2, 3]) + s2 = set(s1) + print(s1, s2) + print(s1 == s2) + print(s1 is s2) + + print("\n################################\n") + + # 通过切片操作 + l1 = [1, 2, 3] + l2 = l1[:] + print(l1 == l2) + print(l1 is l2) + + print("\n################################\n") + + # 使用copy函数 + l2 = copy.copy(l1) + print(l1 == l2) + print(l1 is l2) + + print("\n################################\n") + + # 元组的不同,返回一个指向元组的引用 + t1 = (1, 2, 3) + t2 = tuple(t1) + print(t1 == t2) + print(t1 is t2) + + print("\n################################\n") + + # 浅拷贝的副作用 + l1 = [[1, 2], (30, 40)] + l2 = list(l1) + l1.append(100) + l1[0].append(3) + print(l1) + print(l2) + l1[1] += (50, 60) + print(l1) + print(l2) + + print("\n################################\n") + + # 深拷贝 + l1 = [[1, 2], (30, 40)] + l2 = copy.deepcopy(l1) + l1.append(100) + l1[0].append(3) + print(l1, l2) + # 陷入无限循环的深拷贝 + x = [1] + x.append(x) + print(x) + y = copy.deepcopy(x) + print(y) + + print("\n################################\n") + + # 思考题 + # print(x == y) #报错 + print(x is y) diff --git a/16/canshu.py b/16/canshu.py index d6d645c4..8919ed5b 100644 --- a/16/canshu.py +++ b/16/canshu.py @@ -3,63 +3,98 @@ if __name__ == "__main__": - # 变量及赋值 - a = 1 - b = a - a = a + 1 - print(a, b) - # 列表赋值 - l1 = [1,2,3] - l2 = l1 - l1.append(4) - print(l1) - print(l2) - - # 函数参数传递 - def my_func1(b): - b = 2 - - a = 1 - my_func1(a) - print(a) - - def my_func2(b): - b = 2 - return b - - a = my_func2(a) - print(a) - # 传入可变对象 - def my_func3(l2): - l2.append(4) - l1 = [1,2,3] - my_func3(l1) - print(l1) - # 参数原值不变 - def my_func4(l2): - l2 = l2 + [4] - l1 = [1,2,3] - my_func4(l1) - print(l1) - # 要改变参数原值的做法 - def my_func5(l2): - l2 = l2 + [4] - return l2 - l1 = [1,2,3] - l1 = my_func5(l1) - print(l1) - - # 思考题1 - l1 = [1,2,3,4] - l2 = [1,2,3,4] - l3 = l2 - print(id(l1), id(l2), id(l3)) - # 思考题2 - def func(d): - d["a"] = 10 - d["b"] = 20 - - d = {"a":1, "b":2} - func(d) - print(d) - \ No newline at end of file + # 变量及赋值 + a = 1 + b = a + a = a + 1 + print(a, b) + + print("\n################################\n") + + # 列表赋值 + l1 = [1, 2, 3] + l2 = l1 + l1.append(4) + print(l1) + print(l2) + + print("\n################################\n") + + + # 函数参数传递 + def my_func1(b): + b = 2 + + + a = 1 + my_func1(a) + print(a) + + print("\n################################\n") + + + def my_func2(b): + b = 2 + return b + + + a = my_func2(a) + print(a) + + print("\n################################\n") + + + # 传入可变对象 + def my_func3(l2): + l2.append(4) + + + l1 = [1, 2, 3] + my_func3(l1) + print(l1) + + print("\n################################\n") + + + # 参数原值不变 + def my_func4(l2): + l2 = l2 + [4] + + + l1 = [1, 2, 3] + my_func4(l1) + print(l1) + + print("\n################################\n") + + + # 要改变参数原值的做法 + def my_func5(l2): + l2 = l2 + [4] + return l2 + + + l1 = [1, 2, 3] + l1 = my_func5(l1) + print(l1) + + print("\n################################\n") + + # 思考题1 + l1 = [1, 2, 3, 4] + l2 = [1, 2, 3, 4] + l3 = l2 + print(id(l1), id(l2), id(l3)) + + print("\n################################\n") + + + # 思考题2 + def func(d): + d["a"] = 10 + d["b"] = 20 + + + d = {"a": 1, "b": 2} + func(d) + print(d) diff --git a/17/zsq.py b/17/zsq.py index fe380bf0..639e1c8f 100644 --- a/17/zsq.py +++ b/17/zsq.py @@ -5,139 +5,191 @@ import functools import time - if __name__ == "__main__": - # 函数作为变量 - def func(message): - print("收到一个消息:{}".format(message)) - - send_message = func - send_message("hello world") - - # 函数作为参数 - def root_call(fun, message): - print(fun(message)) - - root_call(func, "函数参数") - - # 函数嵌套 - def fund(message): - def get_message(message): - print("收到一个消息:{}".format(message)) - return get_message(message) - - fund("函数嵌套") - - # 闭包 - def func_closure(): - def get_message(message): - print("收到一个消息:{}".format(message)) - return get_message - - send_message = func_closure() - send_message("返回函数对象(闭包)") - - # 简单装饰器例子 - def my_decorator(func): - def wrapper(): - print("装饰器") - func() - return wrapper - - def greet(): - print("你好") - - greet = my_decorator(greet) - greet() - - # 原函数还是原函数吗? - print(greet.__name__) - print(help(greet)) - - # 使用functools.wrap - def my_decorator2(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - print("functools的装饰器") - func(*args, **kwargs) - return wrapper - - @my_decorator2 - def greet2(message): - print(message) - - greet2("functools") - print(greet2.__name__) - - # 类装饰器 - class Count(): - def __init__(self, func): - self.func = func - self.num_calls = 0 - - def __call__(self, *args, **kwargs): - self.num_calls += 1 - print("num of call is: {}".format(self.num_calls)) - return self.func(*args, **kwargs) - - @Count - def example(): - print("类装饰器") - - example() - example() - - # 装饰器嵌套 - def my_decorator_a(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - print("functools的装饰器a") - func(*args, **kwargs) - return wrapper - - def my_decorator_b(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - print("functools的装饰器b") - func(*args, **kwargs) - return wrapper - - @my_decorator_a - @my_decorator_b - def greet3(message): - print(message) - - greet3("functools") - print(greet3.__name__) - - # 应用举例 给函数加上计时功能 - def log_execution_time(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - start = time.perf_counter() - res = func(*args, **kwargs) - end = time.perf_counter() - print("函数{}运行耗时{}秒".format(func.__name__, end-start)) - return res - return wrapper - - @log_execution_time - def add(n): - s = 0 - for i in range(n): - s += i - return s - - res = add(10000) - print(res) - - @log_execution_time - def multiply(n): - s = 1 - for i in range(n): - s = s*(i+1) - return s - - res = multiply(10000) - print(res) - \ No newline at end of file + # 函数作为变量 + def func(message): + print("收到一个消息:{}".format(message)) + + + send_message = func + send_message("hello world") + + print("\n################################\n") + + + # 函数作为参数 + def root_call(fun, message): + print(fun(message)) + + + root_call(func, "函数参数") + + print("\n################################\n") + + + # 函数嵌套 + def fund(message): + def get_message(message): + print("收到一个消息:{}".format(message)) + + return get_message(message) + + + fund("函数嵌套") + + print("\n################################\n") + + + # 闭包 + def func_closure(): + def get_message(message): + print("收到一个消息:{}".format(message)) + + return get_message + + + send_message = func_closure() + send_message("返回函数对象(闭包)") + + print("\n################################\n") + + + # 简单装饰器例子 + def my_decorator(func): + def wrapper(): + print("装饰器") + func() + + return wrapper + + + def greet(): + print("你好") + + + greet = my_decorator(greet) + greet() + + print("\n################################\n") + + # 原函数还是原函数吗? + print(greet.__name__) + print(help(greet)) + + print("\n################################\n") + + + # 使用functools.wrap + def my_decorator2(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + print("functools的装饰器") + func(*args, **kwargs) + + return wrapper + + + @my_decorator2 + def greet2(message): + print(message) + + + greet2("functools") + print(greet2.__name__) + + print("\n################################\n") + + + # 类装饰器 + class Count(): + def __init__(self, func): + self.func = func + self.num_calls = 0 + + def __call__(self, *args, **kwargs): + self.num_calls += 1 + print("num of call is: {}".format(self.num_calls)) + return self.func(*args, **kwargs) + + + @Count + def example(): + print("类装饰器") + + + example() + example() + + print("\n################################\n") + + + # 装饰器嵌套 + def my_decorator_a(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + print("functools的装饰器a") + func(*args, **kwargs) + + return wrapper + + + def my_decorator_b(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + print("functools的装饰器b") + func(*args, **kwargs) + + return wrapper + + + @my_decorator_a + @my_decorator_b + def greet3(message): + print(message) + + + greet3("functools") + print(greet3.__name__) + + print("\n################################\n") + + + # 应用举例 给函数加上计时功能 + def log_execution_time(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + start = time.perf_counter() + res = func(*args, **kwargs) + end = time.perf_counter() + print("函数{}运行耗时{}秒".format(func.__name__, end - start)) + return res + + return wrapper + + + @log_execution_time + def add(n): + s = 0 + for i in range(n): + s += i + return s + + + res = add(10000) + print(res) + + print("\n################################\n") + + + @log_execution_time + def multiply(n): + s = 1 + for i in range(n): + s = s * (i + 1) + return s + + + res = multiply(10000) + print(res) + + print("\n################################\n") diff --git a/18/metaclass.py b/18/metaclass.py index db720284..c0ee88f0 100644 --- a/18/metaclass.py +++ b/18/metaclass.py @@ -6,70 +6,106 @@ class Monster(yaml.YAMLObject): - yaml_tag = "Monster" - def __init__(self, name, hp, ac, attacks): - self.name = name - self.hp = hp - self.ac = ac - self.attacks = attacks - - def __repr__(self): - return "{}(name = {}, hp = {}, ac = {}, attacks = {}".format(self.__class__.__name__, self.name, self.hp, self.ac, self.attacks) - + yaml_tag = "Monster" + + def __init__(self, name, hp, ac, attacks): + self.name = name + self.hp = hp + self.ac = ac + self.attacks = attacks + + def __repr__(self): + return "{}(name = {}, hp = {}, ac = {}, attacks = {}".format(self.__class__.__name__, self.name, self.hp, + self.ac, self.attacks) + if __name__ == "__main__": - Monster(name = "zym", hp = [2, 6], ac = 16, attacks = ["BITE", "HURT"]) - print(yaml.dump(Monster(name = "zym2", hp = [3, 6], ac = 18, attacks = ["BITE", "HURT"]))) - - # 所有用户自定义类,是type的实例 - class MyClass: - pass - - instance = MyClass() - print(type(instance)) - print(type(MyClass)) - # 用户自定义类,是type类的__call__运算符重载 - class MyClass2: - data = 1 - - instance = MyClass2() - print(MyClass2, instance, instance.data) - - MyClass = type("MyClass", (), {"data":1}) - instance = MyClass() - print(MyClass, instance, instance.data) - - # 网友的例子 - class MyMeta(type): - def __init__(self, name, bases, dic): - super().__init__(name, bases, dic) - print("===>MyMeta.__init__") - print(self.__name__) - print(dic) - print(self.yaml_tag) - - def __new__(cls, *args, **kwargs): - print("===>MyMeta.__new__") - print(cls.__name__) - return type.__new__(cls, *args, **kwargs) - - def __call__(cls, *args, **kwargs): - print("===>MyMeta.__call__") - obj = cls.__new__(cls) - cls.__init__(cls, *args, **kwargs) - return obj - - - class Foo(metaclass=MyMeta): - yaml_tag = "!Foo" - - def __init__(self, name): - print("Foo.__init__") - self.name = name - - def __new__(cls, *args, **kwargs): - print("Foo.__new__") - return object.__new__(cls) - - foo = Foo("foo") - \ No newline at end of file + Monster(name="zym", hp=[2, 6], ac=16, attacks=["BITE", "HURT"]) + print(yaml.dump(Monster(name="zym2", hp=[3, 6], ac=18, attacks=["BITE", "HURT"]))) + + print("\n################################\n") + + monster = yaml.load( + """ + ! + ac: 32 + attacks: + - BITE + - HURT + hp: + - 23 + - 86 + name: BurgessLee + """ + , Loader=yaml.FullLoader + ) + + print(yaml.dump(monster)) + + print("\n################################\n") + + + # 所有用户自定义类,是type的实例 + class MyClass: + pass + + + instance = MyClass() + print(type(instance)) + print(type(MyClass)) + + print("\n################################\n") + + + # 用户自定义类,是type类的__call__运算符重载 + class MyClass2: + data = 1 + + + instance = MyClass2() + print(MyClass2 == type(instance)) + print(MyClass2, instance, instance.data) + + print("\n################################\n") + + MyClass = type("MyClass", (), {"data": 1}) + instance = MyClass() + print(MyClass, instance, instance.data) + + print("\n################################\n") + + + # 网友的例子 + class MyMeta(type): + def __init__(self, name, bases, dic): + super().__init__(name, bases, dic) + print("===>MyMeta.__init__") + print(self.__name__) + print(dic) + print(self.yaml_tag) + + def __new__(cls, *args, **kwargs): + print("===>MyMeta.__new__") + print(cls.__name__) + return type.__new__(cls, *args, **kwargs) + + def __call__(cls, *args, **kwargs): + print("===>MyMeta.__call__") + obj = cls.__new__(cls) + cls.__init__(cls, *args, **kwargs) + return obj + + + class Foo(metaclass=MyMeta): + yaml_tag = "!Foo" + + def __init__(self, name): + print("Foo.__init__") + self.name = name + + def __new__(cls, *args, **kwargs): + print("Foo.__new__") + return object.__new__(cls) + + + foo = Foo("foo") diff --git a/19/diedai.py b/19/diedai.py index 8e93b0b4..b23e0909 100644 --- a/19/diedai.py +++ b/19/diedai.py @@ -7,129 +7,148 @@ import functools import time - if __name__ == "__main__": - # 判断一个对象是否可迭代 - def is_iterable(param): - try: - iter(param) - return True - except TypeError: - return False - - params = [ - 1234, - '1234', - [1, 2, 3, 4], - set([1, 2, 3, 4]), - {1:1, 2:2, 3:3, 4:4}, - (1, 2, 3, 4) - ] - for param in params: - print("{} is iterable? {}".format(param, is_iterable(param))) - - # 生成器 - def show_memory_info(hint): - pid = os.getpid() - p = psutil.Process(pid) - - info = p.memory_full_info() - memory = info.uss / 1024. /1024 - print("{} memory used: {}MB".format(hint, memory)) - - def log_execution_time(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - start = time.perf_counter() - res = func(*args, **kwargs) - end = time.perf_counter() - print("函数{}运行耗时{}秒".format(func.__name__, end-start)) - return res - return wrapper - - @log_execution_time - def test_iterator(): - show_memory_info("初始化迭代器") - list1 = [i for i in range(10000000)] - show_memory_info("初始化迭代器以后") - print(sum(list1)) - show_memory_info("调用sum以后") - - @log_execution_time - def test_generator(): - show_memory_info("初始化生成器") - list2 = (i for i in range(10000000)) - show_memory_info("初始化生成器以后") - print(sum(list2)) - show_memory_info("调用sum以后") - - test_iterator() - test_generator() - - # 使用生成器 - def generator(k): - i = 1 - while True: - yield i**k - i += 1 - - gen_1 = generator(1) - gen_3 = generator(3) - - def get_sum(n): - sum_1, sum_3 = 0, 0 - for i in range(n): - next_1 = next(gen_1) - next_3 = next(gen_3) - print("next_1={}, next_3={}".format(next_1, next_3)) - sum_1 += next_1 - sum_3 += next_3 - print(sum_1*sum_1, sum_3) - - get_sum(8) - - # 生成器的另一个例子,找指定元素在列表中的位置 - def index_generator(L, target): - for i, num in enumerate(L): - if num == target: - yield i - - print(list(index_generator([1, 6, 2, 4, 5, 2, 8, 6, 3, 2], 2))) - - # 给定两个有序序列,判断第一个是不是第二个的子序列 - def is_subsequence(a, b): - b = iter(b) - return all(i in b for i in a) - - print(is_subsequence([1,3,5], [1,2,3,4,5])) - print(is_subsequence([1,4,3], [1,2,3,4,5])) - - # 将上面的代码复杂化 - def is_subsequence2(a, b): - b = iter(b) - print(b) - - gen = (i for i in a) - print(gen) - - for i in gen: - print(i) - - gen = ((i in b) for i in a) - print(gen) - - for i in gen: - print(i) - - return all((i in b) for i in a) - - print(is_subsequence2([1,3,5], [1,2,3,4,5])) - print(is_subsequence2([1,4,3], [1,2,3,4,5])) - - # 思考题 有限元素生成器无限迭代 - gen = (i for i in range(5)) - for i in range(10): - print(next(gen)) - - - \ No newline at end of file + # 判断一个对象是否可迭代 + def is_iterable(param): + try: + iter(param) + return True + except TypeError: + return False + + + params = [ + 1234, + '1234', + [1, 2, 3, 4], + set([1, 2, 3, 4]), + {1: 1, 2: 2, 3: 3, 4: 4}, + (1, 2, 3, 4) + ] + for param in params: + print("{} is iterable? {}".format(param, is_iterable(param))) + + print("\n################################\n") + + + # 生成器 + def show_memory_info(hint): + pid = os.getpid() + p = psutil.Process(pid) + + info = p.memory_full_info() + memory = info.uss / 1024. / 1024 + print("{} memory used: {}MB".format(hint, memory)) + + + def log_execution_time(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + start = time.perf_counter() + res = func(*args, **kwargs) + end = time.perf_counter() + print("函数{}运行耗时{}秒".format(func.__name__, end - start)) + return res + + return wrapper + + + @log_execution_time + def test_iterator(): + show_memory_info("初始化迭代器") + list1 = [i for i in range(10000000)] + show_memory_info("初始化迭代器以后") + print(sum(list1)) + show_memory_info("调用sum以后") + + + @log_execution_time + def test_generator(): + show_memory_info("初始化生成器") + list2 = (i for i in range(10000000)) + show_memory_info("初始化生成器以后") + print(sum(list2)) + show_memory_info("调用sum以后") + + + test_iterator() + test_generator() + + print("\n################################\n") + + + # 使用生成器 + def generator(k): + i = 1 + while True: + yield i ** k + i += 1 + + + gen_1 = generator(1) + gen_3 = generator(3) + + + def get_sum(n): + sum_1, sum_3 = 0, 0 + for i in range(n): + next_1 = next(gen_1) + next_3 = next(gen_3) + print("next_1={}, next_3={}".format(next_1, next_3)) + sum_1 += next_1 + sum_3 += next_3 + print(sum_1 * sum_1, sum_3) + + + get_sum(8) + + print("\n################################\n") + + + # 生成器的另一个例子,找指定元素在列表中的位置 + def index_generator(L, target): + for i, num in enumerate(L): + if num == target: + yield i + + + print(list(index_generator([1, 6, 2, 4, 5, 2, 8, 6, 3, 2], 2))) + + + # 给定两个有序序列,判断第一个是不是第二个的子序列 + def is_subsequence(a, b): + b = iter(b) + return all(i in b for i in a) + + + print(is_subsequence([1, 3, 5], [1, 2, 3, 4, 5])) + print(is_subsequence([1, 4, 3], [1, 2, 3, 4, 5])) + + + # 将上面的代码复杂化 + def is_subsequence2(a, b): + b = iter(b) + print(b) + + gen = (i for i in a) + print(gen) + + for i in gen: + print(i) + + gen = ((i in b) for i in a) + print(gen) + + for i in gen: + print(i) + + return all((i in b) for i in a) + + + print(is_subsequence2([1, 3, 5], [1, 2, 3, 4, 5])) + print(is_subsequence2([1, 4, 3], [1, 2, 3, 4, 5])) + + # 思考题 有限元素生成器无限迭代 + gen = (i for i in range(5)) + for i in range(10): + print(next(gen)) diff --git a/20/web_crawl.py b/20/web_crawl.py index 46ba946c..2b28fbac 100644 --- a/20/web_crawl.py +++ b/20/web_crawl.py @@ -9,39 +9,38 @@ async def fetch_content(url): - async with aiohttp.ClientSession(connector = aiohttp.TCPConnector(ssl=False)) as session: - async with session.get(url) as response: - return await response.text() + async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session: + async with session.get(url) as response: + return await response.text() async def main(): - url = "https://movie.douban.com/cinema/later/beijing/" - init_page = await fetch_content(url) - init_soup = BeautifulSoup(init_page, 'lxml') - - movie_names, urls_to_fetch, movie_dates = [], [], [] - - all_movies = init_soup.find("div", id = "showing-soon") - - for each_movie in all_movies.find_all("div", class_ = "item"): - all_a_tag = each_movie.find_all('a') - all_li_tag = each_movie.find_all("li") - - movie_names.append(all_a_tag[1].text) - urls_to_fetch.append(all_a_tag[1]["href"]) - movie_dates.append(all_li_tag[0].text) - - tasks = [fetch_content(url) for url in urls_to_fetch] - pages = await asyncio.gather(*tasks) - - for movie_name, movie_date, page in zip(movie_names, movie_dates, pages): - soup_item = BeautifulSoup(page, "lxml") - img_tag = soup_item.find("img") - print("{} {} {}".format(movie_name, movie_date, img_tag["src"])) - + url = "https://movie.douban.com/cinema/later/beijing/" + init_page = await fetch_content(url) + init_soup = BeautifulSoup(init_page, 'html.parser') + + movie_names, urls_to_fetch, movie_dates = [], [], [] + + all_movies = init_soup.find("div", id="showing-soon") + for each_movie in all_movies.find_all("div", class_="item"): + all_a_tag = each_movie.find_all('a') + all_li_tag = each_movie.find_all("li") + + movie_names.append(all_a_tag[1].text) + urls_to_fetch.append(all_a_tag[1]["href"]) + movie_dates.append(all_li_tag[0].text) + + tasks = [fetch_content(url) for url in urls_to_fetch] + pages = await asyncio.gather(*tasks) + + for movie_name, movie_date, page in zip(movie_names, movie_dates, pages): + soup_item = BeautifulSoup(page, "html.parser") + img_tag = soup_item.find("img") + print("电影:{}\n上映: {}\n海报: {}\n\n".format(movie_name, movie_date, img_tag['src'])) + if __name__ == "__main__": - start = time.perf_counter() - asyncio.run(main()) - end = time.perf_counter() - print("协程爬虫运行耗时{}秒".format(end-start)) \ No newline at end of file + start = time.perf_counter() + asyncio.run(main()) + end = time.perf_counter() + print("协程爬虫运行耗时{}秒".format(end - start)) diff --git a/20/web_normal.py b/20/web_normal.py index ef5c4346..40a111bd 100644 --- a/20/web_normal.py +++ b/20/web_normal.py @@ -8,30 +8,30 @@ def crawler(): - url = "https://movie.douban.com/cinema/later/beijing/" - init_page = requests.get(url).content - init_soup = BeautifulSoup(init_page, "lxml") - - all_movies = init_soup.find("div", id = "showing-soon") - for each_movie in all_movies.find_all("div", class_ = "item"): - all_a_tag = each_movie.find_all("a") - all_li_tag = each_movie.find_all("li") - - movie_name = all_a_tag[1].text - url_to_fetch = all_a_tag[1]['href'] - movie_date = all_li_tag[0].text - - response_item = requests.get(url_to_fetch).content - soup_item = BeautifulSoup(response_item, "lxml") - img_tag = soup_item.find("img") - - print("{} {} {}".format(movie_name, movie_date, img_tag)) - - -if __name__ == "__main__": - start = time.perf_counter() - crawler() - end = time.perf_counter() - print("常规爬虫运行耗时{}秒".format(end-start)) + url = "https://movie.douban.com/cinema/later/beijing/" + headers = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36'} + init_page = requests.get(url, headers=headers).content + init_soup = BeautifulSoup(init_page, "html.parser") + + all_movies = init_soup.find("div", id="showing-soon") + for each_movie in all_movies.find_all("div", class_="item"): + all_a_tag = each_movie.find_all("a") + all_li_tag = each_movie.find_all("li") + + movie_name = all_a_tag[1].text + url_to_fetch = all_a_tag[1]['href'] + movie_date = all_li_tag[0].text + + response_item = requests.get(url_to_fetch, headers=headers).content + soup_item = BeautifulSoup(response_item, "html.parser") + img_tag = soup_item.find("img") - \ No newline at end of file + print("电影:{}\n上映: {}\n海报: {}\n\n".format(movie_name, movie_date, img_tag['src'])) + + +if __name__ == "__main__": + start = time.perf_counter() + crawler() + end = time.perf_counter() + print("常规爬虫运行耗时{}秒".format(end - start)) diff --git a/20/xc.py b/20/xc.py index 3d3d05d8..57a6b168 100644 --- a/20/xc.py +++ b/20/xc.py @@ -9,173 +9,192 @@ def log_execution_time(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - start = time.perf_counter() - res = func(*args, **kwargs) - end = time.perf_counter() - print("函数{}运行耗时{}秒".format(func.__name__, end-start)) - return res - return wrapper + @functools.wraps(func) + def wrapper(*args, **kwargs): + start = time.perf_counter() + res = func(*args, **kwargs) + end = time.perf_counter() + print("函数{}运行耗时{}秒".format(func.__name__, end - start)) + return res + + return wrapper if __name__ == "__main__": - # 爬虫的例子 - def crawl_page(url): - print("正在爬取{}".format(url)) - sleep_time = int(url.split('_')[-1]) - time.sleep(sleep_time) - print("OK {}".format(url)) - - @log_execution_time - def main(urls): - for url in urls: - crawl_page(url) - - main(["url_1", "url_2", "url_3", "url_4"]) - - # 并发版爬虫,效果与上面一致 - async def crawl_page2(url): - print("正在爬取{}".format(url)) - sleep_time = int(url.split('_')[-1]) - await asyncio.sleep(sleep_time) - print("OK {}".format(url)) - - async def main2(urls): - for url in urls: - await crawl_page2(url) - - start = time.perf_counter() - asyncio.run(main2(["url_1", "url_2", "url_3", "url_4"])) - end = time.perf_counter() - print("2运行耗时{}秒".format(end-start)) - - # 真正的并发版爬虫 - async def crawl_page3(url): - print("正在爬取{}".format(url)) - sleep_time = int(url.split('_')[-1]) - await asyncio.sleep(sleep_time) - print("OK {}".format(url)) - - async def main3(urls): - tasks = [asyncio.create_task(crawl_page3(url)) for url in urls] - for task in tasks: - await task - - start = time.perf_counter() - asyncio.run(main3(["url_1", "url_2", "url_3", "url_4"])) - end = time.perf_counter() - print("3运行耗时{}秒".format(end-start)) - - # task的另一种做法 - async def main4(urls): - tasks = [asyncio.create_task(crawl_page3(url)) for url in urls] - await asyncio.gather(*tasks) - - start = time.perf_counter() - asyncio.run(main4(["url_1", "url_2", "url_3", "url_4"])) - end = time.perf_counter() - print("4运行耗时{}秒".format(end-start)) - - # 协程运行底层 - async def worker_1(): - print("work1开始") - await asyncio.sleep(1) - print("work1结束") - - async def worker_2(): - print("work2开始") - await asyncio.sleep(2) - print("work2结束") - - async def main5(): - print("await之前") - await worker_1() - print("await worker_1之后") - await worker_2() - print("await worker_2之后") - - start = time.perf_counter() - asyncio.run(main5()) - end = time.perf_counter() - print("5运行耗时{}秒".format(end-start)) - - async def main6(): - task1 = asyncio.create_task(worker_1()) - task2 = asyncio.create_task(worker_2()) - print("await之前") - await task1 - print("await worker_1之后") - await task2 - print("await worker_2之后") - - - start = time.perf_counter() - asyncio.run(main6()) - end = time.perf_counter() - print("6运行耗时{}秒".format(end-start)) - - # 限定时间,超出就取消。协程出现错误 - async def worker1(): - await asyncio.sleep(1) - return 1 - - async def worker2(): - await asyncio.sleep(2) - return 2/0 - - async def worker3(): - await asyncio.sleep(3) - return 3 - - async def main7(): - task1 = asyncio.create_task(worker1()) - task2 = asyncio.create_task(worker2()) - task3 = asyncio.create_task(worker3()) - await asyncio.sleep(2) - task3.cancel() - - res = await asyncio.gather(task2, task2, task3, return_exceptions=True) - print(res) - - start = time.perf_counter() - asyncio.run(main7()) - end = time.perf_counter() - print("7运行耗时{}秒".format(end-start)) - - # 生产者消费者模型 - async def consumer(queue, id): - while True: - val = await queue.get() - print("{} get a val:{}".format(id, val)) - await asyncio.sleep(1) - - async def producer(queue, id): - for i in range(5): - val = random.randint(1, 10) - await queue.put(val) - print("{} set a val:{}".format(id, val)) - await asyncio.sleep(1) - - async def main8(): - queue = asyncio.Queue() - - consumer_1 = asyncio.create_task(consumer(queue, "consumer_1")) - consumer_2 = asyncio.create_task(consumer(queue, "consumer_2")) - producer_1 = asyncio.create_task(producer(queue, "producer_1")) - producer_2 = asyncio.create_task(producer(queue, "producer_2")) - - await asyncio.sleep(10) - - consumer_1.cancel() - consumer_2.cancel() - - await asyncio.gather(consumer_1, consumer_2, producer_1, producer_2, return_exceptions = True) - - start = time.perf_counter() - asyncio.run(main8()) - end = time.perf_counter() - print("8运行耗时{}秒".format(end-start)) - - - \ No newline at end of file + # 爬虫的例子 + def crawl_page(url): + print("正在爬取{}".format(url)) + sleep_time = int(url.split('_')[-1]) + time.sleep(sleep_time) + print("OK {}".format(url)) + + + @log_execution_time + def main(urls): + for url in urls: + crawl_page(url) + + + # main(["url_1", "url_2", "url_3", "url_4"]) + + # 并发版爬虫,效果与上面一致 + async def crawl_page2(url): + print("正在爬取{}".format(url)) + sleep_time = int(url.split('_')[-1]) + await asyncio.sleep(sleep_time) + print("OK {}".format(url)) + + + async def main2(urls): + for url in urls: + await crawl_page2(url) + + + # print(crawl_page2('')) + + # start = time.perf_counter() + # asyncio.run(main2(["url_1", "url_2", "url_3", "url_4"])) + # end = time.perf_counter() + # print("2运行耗时{}秒".format(end - start)) + + # 真正的并发版爬虫 + async def crawl_page3(url): + print("正在爬取{}".format(url)) + sleep_time = int(url.split('_')[-1]) + await asyncio.sleep(sleep_time) + print("OK {}".format(url)) + + + async def main3(urls): + tasks = [asyncio.create_task(crawl_page3(url)) for url in urls] + for task in tasks: + await task + + + # start = time.perf_counter() + # asyncio.run(main3(["url_1", "url_2", "url_3", "url_4"])) + # end = time.perf_counter() + # print("3运行耗时{}秒".format(end - start)) + + # task的另一种做法 + async def main4(urls): + tasks = [asyncio.create_task(crawl_page3(url)) for url in urls] + await asyncio.gather(*tasks) + + + # start = time.perf_counter() + # asyncio.run(main4(["url_1", "url_2", "url_3", "url_4"])) + # end = time.perf_counter() + # print("4运行耗时{}秒".format(end - start)) + + # 协程运行底层 + async def worker_1(): + print("work1开始") + await asyncio.sleep(10) + print("work1结束") + + + async def worker_2(): + print("work2开始") + await asyncio.sleep(20) + print("work2结束") + + + async def main5(): + print("await之前") + await worker_1() + print("await worker_1之后") + await worker_2() + print("await worker_2之后") + + + # start = time.perf_counter() + # asyncio.run(main5()) + # end = time.perf_counter() + # print("5运行耗时{}秒".format(end - start)) + + async def main6(): + task1 = asyncio.create_task(worker_1()) + task2 = asyncio.create_task(worker_2()) + print("await之前") + await task1 + print("await worker_1之后") + await task2 + print("await worker_2之后") + + + # start = time.perf_counter() + # asyncio.run(main6()) + # end = time.perf_counter() + # print("6运行耗时{}秒".format(end - start)) + + # 限定时间,超出就取消。协程出现错误 + async def worker1(): + await asyncio.sleep(10) + return 1 + + + async def worker2(): + await asyncio.sleep(20) + return 2 / 0 + + + async def worker3(): + await asyncio.sleep(30) + return 3 + + + async def main7(): + task1 = asyncio.create_task(worker1()) + task2 = asyncio.create_task(worker2()) + task3 = asyncio.create_task(worker3()) + await asyncio.sleep(20) + task3.cancel() + + res = await asyncio.gather(task1, task2, task3, return_exceptions=True) + print(res) + + + # start = time.perf_counter() + # asyncio.run(main7()) + # end = time.perf_counter() + # print("7运行耗时{}秒".format(end - start)) + + # 生产者消费者模型 + async def consumer(queue, id): + while True: + val = await queue.get() + print("{} get a val:{} --- {}".format(id, val, time.perf_counter())) + await asyncio.sleep(10) + + + async def producer(queue, id): + for i in range(5): + val = random.randint(1, 10) + await queue.put(val) + print("{} set a val:{} --- {}".format(id, val, time.perf_counter())) + await asyncio.sleep(10) + + + async def main8(): + queue = asyncio.Queue() + + consumer_1 = asyncio.create_task(consumer(queue, "consumer_1")) + consumer_2 = asyncio.create_task(consumer(queue, "consumer_2")) + producer_1 = asyncio.create_task(producer(queue, "producer_1")) + producer_2 = asyncio.create_task(producer(queue, "producer_2")) + + await asyncio.sleep(60) + + consumer_1.cancel() + consumer_2.cancel() + + await asyncio.gather(consumer_1, consumer_2, producer_1, producer_2, return_exceptions=True) + + + start = time.perf_counter() + asyncio.run(main8()) + end = time.perf_counter() + print("8运行耗时{}秒".format(end - start)) + + exit() diff --git a/21/bf.py b/21/bf.py index b2da7147..c0cfe28a 100644 --- a/21/bf.py +++ b/21/bf.py @@ -10,85 +10,87 @@ # 单线程版下载 def download_one(url): - resp = requests.get(url) - print("read {} from {}".format(len(resp.content), url)) - + resp = requests.get(url) + print("read {} from {}".format(len(resp.content), url)) + def download_all(sites): - for site in sites: - download_one(site) - + for site in sites: + download_one(site) + # 多线程版下载 def download_all_futures(sites): - with concurrent.futures.ThreadPoolExecutor(max_workers = 5) as executor: - executor.map(download_one, sites) - + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + executor.map(download_one, sites) + # 并行版 def download_all_futures_bx(sites): - with concurrent.futures.ThreadPoolExecutor() as executor: - executor.map(download_one, sites) - + with concurrent.futures.ProcessPoolExecutor() as executor: + executor.map(download_one, sites) + # 另一种写法的并行版本 def download_all_futures_bx2(sites): - with concurrent.futures.ThreadPoolExecutor() as executor: - to_do = [] - for site in sites: - future = executor.submit(download_one, site) - to_do.append(future) - for future in concurrent.futures.as_completed(to_do): - future.result() + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + to_do = [] + for site in sites: + future = executor.submit(download_one, site) + to_do.append(future) + + for future in concurrent.futures.as_completed(to_do): + future.result() if __name__ == "__main__": - sites = [ - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143655', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143656', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143657', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143658', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143659', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143660', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143661', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143662' - ] - try: - start_time = time.perf_counter() - download_all(sites) - end_time = time.perf_counter() - print("单线程版下载了{}个网站,耗时{}".format(len(sites), end_time-start_time)) - - start_time = time.perf_counter() - download_all_futures(sites) - end_time = time.perf_counter() - print("多线程版下载了{}个网站,耗时{}".format(len(sites), end_time-start_time)) - - start_time = time.perf_counter() - download_all_futures_bx(sites) - end_time = time.perf_counter() - print("并行版下载了{}个网站,耗时{}".format(len(sites), end_time-start_time)) - - start_time = time.perf_counter() - download_all_futures_bx2(sites) - end_time = time.perf_counter() - print("另一个并行版下载了{}个网站,耗时{}".format(len(sites), end_time-start_time)) - # 处理requests异常 - except ConnectionError as err: - print(err) - except HTTPError as err: - print(err) - except Timeout as err: - print(err) - # 处理futures异常 - except TooManyRedirects as err: - print(err) - except CancelledError as err: - print(err) - except TimeoutError as err: - print(err) - except BrokenExecutor as err: - print(err) - except: - print("发生错误") - \ No newline at end of file + sites = [ + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143655', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143656', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143657', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143658', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143659', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143660', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143661', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143662' + ] + try: + # start_time = time.perf_counter() + # download_all(sites) + # end_time = time.perf_counter() + # print("单线程版下载了{}个网站,耗时{}".format(len(sites), end_time - start_time)) + + # start_time = time.perf_counter() + # download_all_futures(sites) + # end_time = time.perf_counter() + # print("多线程版下载了{}个网站,耗时{}".format(len(sites), end_time - start_time)) + + # start_time = time.perf_counter() + # download_all_futures_bx(sites) + # end_time = time.perf_counter() + # print("并行版下载了{}个网站,耗时{}".format(len(sites), end_time - start_time)) + + start_time = time.perf_counter() + download_all_futures_bx2(sites) + end_time = time.perf_counter() + print("另一个并行版下载了{}个网站,耗时{}".format(len(sites), end_time - start_time)) + # 处理requests异常 + except ConnectionError as err: + print(err) + except requests.HTTPError as err: + print(err) + except requests.Timeout as err: + print(err) + # 处理futures异常 + except concurrent.futures.TooManyRedirects as err: + print(err) + except concurrent.futures.CancelledError as err: + print(err) + except concurrent.futures.TimeoutError as err: + print(err) + except concurrent.futures.BrokenExecutor as err: + print(err) + except: + print("发生错误") + + exit() \ No newline at end of file diff --git a/22/bf_as.py b/22/bf_as.py index c1ea8572..7803ca38 100644 --- a/22/bf_as.py +++ b/22/bf_as.py @@ -6,98 +6,108 @@ import aiohttp import time import concurrent.futures -import multiprocessing +import pathos.multiprocessing # 异步网页下载 async def download_one(url): - async with aiohttp.ClientSession() as session: - async with session.get(url) as resp: - print("read {} from {}".format(resp.content_length, url)) - - + async with aiohttp.ClientSession() as session: + async with session.get(url) as resp: + print("read {} from {}".format(resp.content_length, url)) + + async def download_all(sites): - tasks = [asyncio.create_task(download_one(site)) for site in sites] - await asyncio.gather(*tasks) - - + tasks = [asyncio.create_task(download_one(site)) for site in sites] + await asyncio.gather(*tasks) + + # 思考题 求列表中元素的整数平方和 常规版本 def cpu_bound(number): - print("number={}, result={}".format(number, sum(i*i for i in range(number)))) - + print("number={}, result={}".format(number, sum(i * i for i in range(number)))) + + def calculate_sums(numbers): - for number in numbers: - cpu_bound(number) - + for number in numbers: + cpu_bound(number) + + def calcuter_normal(numbers): - start_time = time.perf_counter() - calculate_sums(numbers) - end_time = time.perf_counter() - print("普通版本,耗时{}秒".format(end_time-start_time)) - + start_time = time.perf_counter() + calculate_sums(numbers) + end_time = time.perf_counter() + print("普通版本,耗时{}秒".format(end_time - start_time)) + + # 思考题 并行版本 def calculate_sums_future(numbers): - with concurrent.futures.ThreadPoolExecutor() as executor: - executor.map(cpu_bound, numbers) - + with concurrent.futures.ThreadPoolExecutor() as executor: + executor.map(cpu_bound, numbers) + + def calcuter_future(numbers): - start_time = time.perf_counter() - calculate_sums_future(numbers) - end_time = time.perf_counter() - print("多进程版本,耗时{}秒".format(end_time-start_time)) - + start_time = time.perf_counter() + calculate_sums_future(numbers) + end_time = time.perf_counter() + print("多进程版本,耗时{}秒".format(end_time - start_time)) + + # 思考题 动态规划版本 -squ = {} # 用来储存中间结果 +squ = {} # 用来储存中间结果 + + def cpu_dp(number): - result = 0 - for i in range(number): - if i not in squ.keys(): - squ[i] = i*i - result += squ[i] - print("number={}, result={}".format(number, result)) - + result = 0 + for i in range(number): + if i not in squ.keys(): + squ[i] = i * i + result += squ[i] + print("number={}, result={}".format(number, result)) + + def calculate_sums_dp(numbers): - for number in numbers: - cpu_dp(number) - + for number in numbers: + cpu_dp(number) + + def calcuter_dp(numbers): - start_time = time.perf_counter() - calculate_sums_dp(numbers) - end_time = time.perf_counter() - print("动态规划版本,耗时{}秒".format(end_time-start_time)) - + start_time = time.perf_counter() + calculate_sums_dp(numbers) + end_time = time.perf_counter() + print("动态规划版本,耗时{}秒".format(end_time - start_time)) + + # 思考题的multiprocessing版本 def calculate_sums_multiprocessing(numbers): - with multiprocessing.Pool() as pool: - pool.map(cpu_bound, numbers) - + with pathos.multiprocessing.Pool() as pool: + pool.map(cpu_bound, numbers) + + def calcuter_multiprocessing(numbers): - start_time = time.perf_counter() - calculate_sums_multiprocessing(numbers) - end_time = time.perf_counter() - print("multiprocessing版本,耗时{}秒".format(end_time-start_time)) + start_time = time.perf_counter() + calculate_sums_multiprocessing(numbers) + end_time = time.perf_counter() + print("multiprocessing版本,耗时{}秒".format(end_time - start_time)) if __name__ == "__main__": - sites = [ - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143655', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143656', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143657', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143658', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143659', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143660', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143661', - 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143662' - ] - start_time = time.perf_counter() - asyncio.run(download_all(sites)) - end_time = time.perf_counter() - print("异步版下载了{}个网站,耗时{}".format(len(sites), end_time-start_time)) - - # 思考题 - numbers = [1000000+x for x in range(10)] - calcuter_normal(numbers) - calcuter_future(numbers) - calcuter_dp(numbers) - calcuter_multiprocessing(numbers) - \ No newline at end of file + sites = [ + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143655', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143656', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143657', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143658', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143659', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143660', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143661', + 'http://www.dapenti.com/blog/readforwx.asp?name=xilei&id=143662' + ] + # start_time = time.perf_counter() + # asyncio.run(download_all(sites)) + # end_time = time.perf_counter() + # print("异步版下载了{}个网站,耗时{}".format(len(sites), end_time - start_time)) + + # 思考题 + numbers = [1000000 + x for x in range(10)] + calcuter_normal(numbers) + calcuter_future(numbers) + calcuter_dp(numbers) + calcuter_multiprocessing(numbers) diff --git a/23/gil.py b/23/gil.py index 46867b6c..69c2fa28 100644 --- a/23/gil.py +++ b/23/gil.py @@ -10,49 +10,61 @@ # 单线程版 def CountDown(n): - while n> 0: - n -= 1 - + while n> 0: + n -= 1 if __name__ == "__main__": - n = 3000000 - start_time = time.perf_counter() - CountDown(n) - end_time = time.perf_counter() - print("n = {},单线程版耗时{}".format(n, end_time-start_time)) - # 多线程版 - start_time = time.perf_counter() - t1 = Thread(target = CountDown, args = [n//2]) - t2 = Thread(target = CountDown, args = [n//2]) - t1.start() - t2.start() - t1.join() - t2.join() - end_time = time.perf_counter() - print("n = {},多线程版耗时{}".format(n, end_time-start_time)) - - # 对象引用计数 - for k in range(100): - a = [] - b = a - print(sys.getrefcount(a)) - - # 线程安全 - n = 0 - def foo(): - global n - n += 1 - - threads = [] - for i in range(100): - t = threading.Thread(target = foo) - threads.append(t) - - for t in threads: - t.start() - - for t in threads: - t.join() - - print(n) \ No newline at end of file + n = 100000000 + + # start_time = time.perf_counter() + # CountDown(n) + # end_time = time.perf_counter() + # print("n = {},单线程版耗时{}".format(n, end_time - start_time)) + + # 多线程版 + # start_time = time.perf_counter() + # t1 = Thread(target=CountDown, args=[n // 4]) + # t2 = Thread(target=CountDown, args=[n // 4]) + # t3 = Thread(target=CountDown, args=[n // 4]) + # t4 = Thread(target=CountDown, args=[n // 4]) + # t1.start() + # t2.start() + # t3.start() + # t4.start() + # t1.join() + # t2.join() + # t3.join() + # t4.join() + # end_time = time.perf_counter() + # print("n = {},多线程版耗时{}".format(n, end_time - start_time)) + + # 对象引用计数 + # a = [] + # b = a + # print(sys.getrefcount(a)) + + # 线程安全 + n = 0 + + lock = threading.Lock() + + + def foo(): + global n + with lock: + n += 1 + + + threads = [] + for i in range(100): + t = threading.Thread(target=foo) + threads.append(t) + + for t in threads: + t.start() + + for t in threads: + t.join() + + print(n) diff --git a/24/garbage.py b/24/garbage.py index 1e81eba2..215f415c 100644 --- a/24/garbage.py +++ b/24/garbage.py @@ -9,84 +9,90 @@ import objgraph +# 显示当前 python 程序占用的内存大小 def show_memory_info(hint): - pid = os.getpid() - p = psutil.Process(pid) - - info = p.memory_full_info() - memory = info.uss / 1024. / 1024 - print("{}内存使用了{}MB".format(hint, memory)) - - + pid = os.getpid() + p = psutil.Process(pid) + + info = p.memory_full_info() + memory = info.uss / 1024. / 1024 + print("{},内存使用了{}MB".format(hint, memory)) + + def func(): - show_memory_info("局部变量初始化") - a = [i for i in range(10000000)] - show_memory_info("局部变量创建后") - return a - + show_memory_info("局部变量初始化") + a = [i for i in range(10000000)] + show_memory_info("局部变量创建后") + return a + def func2(): - show_memory_info("全局变量初始化") - global a - a = [i for i in range(10000000)] - show_memory_info("全局变量创建后") - - + show_memory_info("全局变量初始化") + global a + a = [i for i in range(10000000)] + show_memory_info("全局变量创建后") + + # python内部引用计数 def jishu(): - a = [] - print(sys.getrefcount(a)) - - def func(a): - print(sys.getrefcount(a)) - - func(a) - print(sys.getrefcount(a)) + a = [] + # 两次引用,一次来自 a,一次来自 getrefcount + print(sys.getrefcount(a)) + + def func(a): + # 四次引用,a,python 的函数调用栈,函数参数,和 getrefcount + print(sys.getrefcount(a)) + + func(a) + # 两次引用,一次来自 a,一次来自 getrefcount,函数 func 调用已经不存在 + print(sys.getrefcount(a)) # 循环引用 def func3(): - show_memory_info("循环引用初始化") - a = [i for i in range(10000000)] - b = [i for i in range(10000000)] - show_memory_info("ab创建完成") - a.append(b) - b.append(a) - + show_memory_info("循环引用初始化") + a = [i for i in range(10000000)] + b = [i for i in range(10000000)] + show_memory_info("ab创建完成") + a.append(b) + b.append(a) + if __name__ == "__main__": - func() - show_memory_info("局部变量完成后") - func2() - show_memory_info("全局变量完成后") - l = func() - show_memory_info("列表变量完成后") - jishu() - - print("手动回收垃圾") - show_memory_info("初始化前") - a = [i for i in range(10000000)] - show_memory_info("初始化后") - - del a - gc.collect() - - show_memory_info("完成") - # print(a) - - # 循环引用 - func3() - show_memory_info("循环引用完成") - gc.collect() - show_memory_info("手动垃圾回收完成") - - # objgraph - a = [1,2,3] - b = [4,5,6] - - a.append(b) - b.append(a) - - objgraph.show_refs([a], filename = "objref.png") - objgraph.show_backrefs([a], filename = "backref.png" ) - \ No newline at end of file + # func() + # show_memory_info("局部变量完成后") + + # func2() + # show_memory_info("全局变量完成后") + + # l = func() + # show_memory_info("列表变量完成后") + + # jishu() + + # print("手动回收垃圾>>>>") + # show_memory_info("初始化前") + # a = [i for i in range(10000000)] + # show_memory_info("初始化后") + # + # del a + # gc.collect() + # + # show_memory_info("完成") + # print(a) + + # 循环引用 + # func3() + # show_memory_info("循环引用完成") + # gc.collect() + # show_memory_info("手动垃圾回收完成") + + # objgraph + a = [1, 2, 3] + b = [4, 5, 6] + + a.append(b) + b.append(a) + + objgraph.show_refs([a], filename="objref.png") + objgraph.show_backrefs([a], filename="backref.png") diff --git a/28/assert.py b/28/assert.py index 1dc08c9d..419b45de 100644 --- a/28/assert.py +++ b/28/assert.py @@ -3,18 +3,24 @@ def apply_discount(price, discount): - updated_price = price * (1 - discount) - assert 0 <= updated_price <= price, "价格必须大于等于0小于原价" - return updated_price - - + updated_price = price * (1 - discount) + assert 0 <= updated_price <= price, "价格必须大于等于0小于原价" + return updated_price + + def calculate_average_price(total_sales, num_sales): - assert num_sales> 0, "销售数量必须大于0" - return total_sales / num_sales - + assert num_sales> 0, "销售数量必须大于0" + return total_sales / num_sales + if __name__ == "__main__": - print(apply_discount(100, 0.2)) - # print(apply_discount(100, 2)) - print(calculate_average_price(100, 2)) - print(calculate_average_price(100, -2)) \ No newline at end of file + if __debug__: + print("__debug__") + else: + print("__release__") + + print(apply_discount(100, 0.2)) + # print(apply_discount(100, 2)) + + print(calculate_average_price(100, 2)) + # print(calculate_average_price(100, -2)) diff --git a/29/with.py b/29/with.py index 30e9aa0f..1b9206b8 100644 --- a/29/with.py +++ b/29/with.py @@ -1,32 +1,71 @@ # coding:utf-8 # 第29课 资源上下文,with +from contextlib import contextmanager + # 基于类的上下文管理器 class FileManager: - def __init__(self, name, mode): - print("__init__") - self.name = name - self.mode = mode - - def __enter__(self): - print("enter") - self.file = open(self.name, self.mode) - return self.file - - def __exit__(self, exc_type, exc_val, exc_tb): - print("exit") - if self.file: - self.file.close() - + def __init__(self, name, mode): + print("__init__") + self.name = name + self.mode = mode + + def __enter__(self): + print("__enter__") + self.file = open(self.name, self.mode) + return self.file + + def __exit__(self, exc_type, exc_val, exc_tb): + print("__exit__") + if self.file: + self.file.close() + + +class Foo: + def __init__(self): + print('__init__ called') + + def __enter__(self): + print('__enter__ called') + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + print('__exit__ called') + if exc_type: + print(f'exc_type: {exc_type}') + print(f'exc_value: {exc_val}') + print(f'exc_traceback: {exc_tb}') + print('exception handled') + return True + if __name__ == "__main__": - for i in range(10000): - with open("test.txt", "w") as f: - f.write("hello") - - with FileManager("text.txt", "w") as f: - print("准备写文件") - f.write("hello 2") - - \ No newline at end of file + # for x in range(10000000): + # f = open("test.txt", "w") + # f.write('hello') + + # for i in range(10000): + # with open("test.txt", "w") as f: + # f.write("hello") + + # with FileManager("text.txt", "w") as f: + # print("准备写文件") + # f.write("hello 2") + + # with Foo() as obj: + # raise Exception('exception raised').with_traceback(None) + + @contextmanager + def file_manager(name, mode): + try: + f = open(name, mode) + yield f + finally: + f.close() + + + with file_manager('test.txt', 'w') as f: + f.write('hello python') + + exit() diff --git a/30/test.py b/30/test.py index 7229376d..e5a80faf 100644 --- a/30/test.py +++ b/30/test.py @@ -8,56 +8,57 @@ # 将要测试的排序函数 def sort(arr): - l = len(arr) - for i in range(0, l): - for j in range(i + 1, l): - if arr[i]>= arr[j]: - tmp = arr[i] - arr[i] = arr[j] - arr[j] = tmp - - -# 单元测试 + l = len(arr) + for i in range(0, l): + for j in range(i + 1, l): + if arr[i]>= arr[j]: + tmp = arr[i] + arr[i] = arr[j] + arr[j] = tmp + + +# 单元测试,编写子类继承 unittest.TestCase class TestSort(unittest.TestCase): - # 以test开头的函数会被测试 - def test_sort(self): - arr = [3, 4, 1, 5, 6] - sort(arr) - self.assertEqual(arr, [1, 3, 4, 5, 6]) - - + # 以test开头的函数会被测试 + def test_sort(self): + arr = [3, 4, 1, 5, 6] + sort(arr) + # assert 结果跟我们期待的一样 + self.assertEqual(arr, [1, 3, 4, 5, 6]) + + # mock class A(unittest.TestCase): - def m1(self): - val = self.m2() - self.m3(val) - - def m2(self): - pass - - def m3(self, val): - pass - - def test_m1(self): - a = A() - a.m2 = MagicMock(return_value = "custom_val") - a.m3 = MagicMock() - a.m1() - self.assertTrue(a.m2.called) - a.m3.assert_called_with("custom_val") - + def m1(self): + val = self.m2() + self.m3(val) + + def m2(self): + pass + + def m3(self, val): + pass + + def test_m1(self): + a = A() + a.m2 = MagicMock(return_value="custom_val") + a.m3 = MagicMock() + a.m1() + self.assertTrue(a.m2.called) # 验证 m2 被 call 过 + a.m3.assert_called_with("custom_val") # 验证 m3 被指定参数 call 过 + def side_effect(arg): - if arg < 0: - return 1 - else: - return 2 - + if arg < 0: + return 1 + else: + return 2 + if __name__ == "__main__": - unittest.main() - mock = MagicMock() - mock.side_effect = side_effect - print(mock(1)) - print(mock(-2)) - \ No newline at end of file + # unittest.main() + + mock = MagicMock() + mock.side_effect = side_effect + print(mock(1)) + print(mock(-2)) diff --git a/31/debug.py b/31/debug.py index 229c9067..cf1c02ae 100644 --- a/31/debug.py +++ b/31/debug.py @@ -7,44 +7,47 @@ def func(): - print("进入func()") - + print("进入func()") + def memoize(f): - memo = {} - def helper(x): - if x not in memo: - memo[x] = f(x) - return memo[x] - return helper + memo = {} + + def helper(x): + if x not in memo: + memo[x] = f(x) + return memo[x] + + return helper @memoize def fib(n): - if n == 0: - return 0 - elif n == 1: - return 1 - else: - return fib(n - 1) + fib(n - 2) - + if n == 0: + return 0 + elif n == 1: + return 1 + else: + return fib(n - 1) + fib(n - 2) + def fib_seq(n): - res = [] - if n> 0: - res.extend(fib_seq(n - 1)) - res.append(fib(n)) - return res + res = [] + if n> 0: + res.extend(fib_seq(n - 1)) + res.append(fib(n)) + return res if __name__ == "__main__": - a = 1 - b = 2 - # pdb.set_trace() - func() - c = 3 - print(a + b + c) - - # res = fib_seq(30) - # print(res) - cProfile.run("fib_seq(30)") \ No newline at end of file + # a = 1 + # b = 2 + # pdb.set_trace() + # func() + # c = 3 + # print(a + b + c) + + # res = fib_seq(30) + # print(res) + + cProfile.runctx("fib_seq(30)", None, locals()) diff --git a/33/closeprice.png b/33/closeprice.png index 25a7f2e9..9098eae5 100644 Binary files a/33/closeprice.png and b/33/closeprice.png differ diff --git a/33/invest.py b/33/invest.py index efa5e44b..22cb88de 100644 --- a/33/invest.py +++ b/33/invest.py @@ -8,47 +8,54 @@ import pandas as pd +########## GEMINI 行情接口 ########## +## https://api.gemini.com/v1/pubticker/:symbol + + # 获取Gemini交易所报价数据 def get_price(): - gemini_ticker = "https://api.gemini.com/v1/pubticker/{}" - symbol = "btcusd" - btc_data = requests.get(gemini_ticker.format(symbol)).json() - print(json.dumps(btc_data, indent = 4)) - + gemini_ticker = "https://api.gemini.com/v1/pubticker/{}" + symbol = "btcusd" + btc_data = requests.get(gemini_ticker.format(symbol)).json() + print(json.dumps(btc_data, indent=4)) + # 获取最近一个小时交易数据并绘图 def get_hour_price(): - # 要获取的数据时间段 - periods = "3600" - - # 抓取数据 - resp = requests.get("https://api.cryptowat.ch/markets/gemini/btcusd/ohlc", params = { - "periods" : periods - }) - data = resp.json() - - # 转换成pandas的data frame - df = pd.DataFrame( - data["result"][periods], - columns = [ - "收盘时间", - "开盘时间", - "最高价", - "最低价", - "收盘价", - "成交量", - "NA" - ] - ) - - # 输出 - print(df.head()) - ax = df["收盘价"].plot(figsize = (14, 7)) - fig = ax.get_figure() - fig.savefig("closeprice.png") + # 选择要获取的数据时间段 + periods = "3600" + + # 通过 Http 抓取 btc 历史价格数据 + resp = requests.get("https://api.cryptowat.ch/markets/gemini/btcusd/ohlc", params={ + "periods": periods + }) + data = resp.json() + + # 转换成 pandas data frame + df = pd.DataFrame( + data["result"][periods], + columns=[ + "收盘时间", + "开盘时间", + "最高价", + "最低价", + "收盘价", + "成交量", + "NA" + ] + ) + + # 输出 DataFrame 的头部几行 + print(df.head()) + + # 绘制 btc 价格曲线 + ax = df["收盘价"].plot(figsize=(14, 7)) + + # 绘制图形 + fig = ax.get_figure() + fig.savefig("closeprice.png") if __name__ == "__main__": - get_price() - get_hour_price() - \ No newline at end of file + # get_price() + get_hour_price() diff --git a/Investment/TradingMonitor/TradingMonitor/migrations/0001_initial.py b/Investment/TradingMonitor/TradingMonitor/migrations/0001_initial.py new file mode 100644 index 00000000..a2ba2449 --- /dev/null +++ b/Investment/TradingMonitor/TradingMonitor/migrations/0001_initial.py @@ -0,0 +1,23 @@ +# Generated by Django 4.1 on 2022年08月18日 12:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Position', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('asset', models.CharField(max_length=10)), + ('timestamp', models.DateTimeField()), + ('amount', models.DecimalField(decimal_places=3, max_digits=10)), + ], + ), + ] diff --git a/Investment/TradingMonitor/TradingMonitor/migrations/__init__.py b/Investment/TradingMonitor/TradingMonitor/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/Investment/TradingMonitor/TradingMonitor/models.py b/Investment/TradingMonitor/TradingMonitor/models.py index a2e7ecfd..5e8e1558 100644 --- a/Investment/TradingMonitor/TradingMonitor/models.py +++ b/Investment/TradingMonitor/TradingMonitor/models.py @@ -5,7 +5,6 @@ class Position(models.Model): - asset = models.CharField(max_length = 10) - timestamp = models.DataTimeField() - amount = models.DecimalField(max_digits = 10, decimal_places = 3) - \ No newline at end of file + asset = models.CharField(max_length=10) + timestamp = models.DateTimeField() + amount = models.DecimalField(max_digits=10, decimal_places=3) diff --git a/Investment/TradingMonitor/TradingMonitor/settings.py b/Investment/TradingMonitor/TradingMonitor/settings.py index bf60a986..354c2467 100644 --- a/Investment/TradingMonitor/TradingMonitor/settings.py +++ b/Investment/TradingMonitor/TradingMonitor/settings.py @@ -15,7 +15,6 @@ # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/2.2/howto/deployment/checklist/ @@ -27,7 +26,6 @@ ALLOWED_HOSTS = [] - # Application definition INSTALLED_APPS = [ @@ -37,6 +35,7 @@ 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', + 'TradingMonitor', ] MIDDLEWARE = [ @@ -54,7 +53,7 @@ TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [], + 'DIRS': [os.path.join(BASE_DIR, 'TradingMonitor/templates')], 'APP_DIRS': True, 'OPTIONS': { 'context_processors': [ @@ -69,7 +68,6 @@ WSGI_APPLICATION = 'TradingMonitor.wsgi.application' - # Database # https://docs.djangoproject.com/en/2.2/ref/settings/#databases @@ -80,7 +78,6 @@ } } - # Password validation # https://docs.djangoproject.com/en/2.2/ref/settings/#auth-password-validators @@ -99,7 +96,6 @@ }, ] - # Internationalization # https://docs.djangoproject.com/en/2.2/topics/i18n/ @@ -113,7 +109,6 @@ USE_TZ = True - # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/2.2/howto/static-files/ diff --git a/Investment/TradingMonitor/TradingMonitor/templates/positions.html b/Investment/TradingMonitor/TradingMonitor/templates/positions.html new file mode 100644 index 00000000..bb777bc3 --- /dev/null +++ b/Investment/TradingMonitor/TradingMonitor/templates/positions.html @@ -0,0 +1,23 @@ + + + +Positions for {{asset}} + + +
+

Positions for {{asset}}

+ + + + + + +{% for position in positions %} + + + + + +{% endfor %} +
TimeAmount
123{{position.timestamp}}{{position.asset}}
+

AltStyle によって変換されたページ (->オリジナル) /

\ No newline at end of file diff --git a/Investment/TradingMonitor/TradingMonitor/urls.py b/Investment/TradingMonitor/TradingMonitor/urls.py index 5d7984b3..7c487648 100644 --- a/Investment/TradingMonitor/TradingMonitor/urls.py +++ b/Investment/TradingMonitor/TradingMonitor/urls.py @@ -15,7 +15,9 @@ """ from django.contrib import admin from django.urls import path +from . import views urlpatterns = [ path('admin/', admin.site.urls), + path('positions/', views.render_positions) ] diff --git a/Investment/TradingMonitor/TradingMonitor/views.py b/Investment/TradingMonitor/TradingMonitor/views.py index 45ece67e..a8be9e66 100644 --- a/Investment/TradingMonitor/TradingMonitor/views.py +++ b/Investment/TradingMonitor/TradingMonitor/views.py @@ -2,11 +2,11 @@ from django.shortcuts import render -ftom .models import Position +from .models import Position def render_positions(request, asset): - positions = Position.objects.filter(asset = asset) - context = {"asset" : asset, "positions" : positions} - return render(request, "positions.html", context) - \ No newline at end of file + positions = Position.objects.filter(asset=asset) + + context = {"asset": asset, "positions": positions} + return render(request, "positions.html", context) diff --git a/Investment/TradingMonitor/db.sqlite3 b/Investment/TradingMonitor/db.sqlite3 index ca709993..70310acb 100644 Binary files a/Investment/TradingMonitor/db.sqlite3 and b/Investment/TradingMonitor/db.sqlite3 differ diff --git a/Investment/backtest.py b/Investment/backtest.py index 66e99178..6686e84b 100644 --- a/Investment/backtest.py +++ b/Investment/backtest.py @@ -12,202 +12,283 @@ def assert_msg(condition, msg): - if not condition: - raise Exception(msg) - + if not condition: + raise Exception(msg) + def read_file(filename): - # 获得文件绝对路径 - filepath = path.join(path.dirname(__file__), filename) - - # 判断文件是否存在 - assert_msg(path.exists(filepath), "文件不存在") - - # 读取csv文件并返回 - return pd.read_csv(filepath, - index_col = 0, - parse_dates = True, - infer_datetime_format = True) - - + # 获得文件绝对路径 + filepath = path.join(path.dirname(__file__), filename) + + # 判断文件是否存在 + assert_msg(path.exists(filepath), "文件不存在") + + # 读取csv文件并返回 + return pd.read_csv(filepath, index_col=0, parse_dates=True, infer_datetime_format=True) + + # 策略类 -class Strategy(metaclass = abc.ABCMeta): - """抽象策略类,用于定义交易策略""" - def __init__(self, broker, data): - self._indicators = [] - self._broker = broker - self._data = data - self._tick = 0 - - def I(self, func : Callable, *args)->np.ndarray: - value = func(*args) - value = np.asarray(value) - assert_msg(value.shape[-1] == len(self._data.Close), "指示器长度必须和data长度相同") - - self._indicators.append(value) - return value - - @property - def tick(self): - return self._tick - - @abc.abstractmethod - def init(self): - pass - - @abc.abstractmethod - def next(self, tick): - """步进函数""" - pass - - def buy(self): - self._broker.buy() - - def sell(self): - self._broker.sell() - - @property - def data(self): - return self._data +class Strategy(metaclass=abc.ABCMeta): + """ + 抽象策略类,用于定义交易策略。 + + 定义自己的策略类,需要继承这个基类,并实现两个抽象方法: + Strategy.init + Strategy.next + """ + + def __init__(self, broker, data): + """ + 构造策略对象。 + + @params broker: ExchangeAPI 交易 API 接口,用于模拟交易 + @params data: list 行情数据 + """ + + self._indicators = [] + self._broker = broker # type: _Broker + self._data = data # type: _Data + self._tick = 0 + + def I(self, func: Callable, *args) -> np.ndarray: + """ + 计算买卖指标向量。买卖指标向量是一个数组,长度和历史数据对应; + 用于判定这个时间点上需要进行 " 买 " 还是 " 卖 "。 + + 例如计算滑动平均: + def init(): + self.sma = self.I(utils.SMA, self.data.Close, N) + """ + + value = func(*args) + value = np.asarray(value) + assert_msg(value.shape[-1] == len(self._data.Close), "指示器长度必须和data长度相同") + + self._indicators.append(value) + return value + + @property + def tick(self): + return self._tick + + @abc.abstractmethod + def init(self): + """ + 初始化策略。在策略回测 / 执行过程中调用一次,用于初始化策略内部状态。 + 这里也可以预计算策略的辅助参数。比如根据历史行情数据: + 计算买卖的指示器向量; + 训练模型 / 初始化模型参数 + """ + pass + + @abc.abstractmethod + def next(self, tick): + """ + 步进函数,执行第 tick 步的策略。 + tick 代表当前的 " 时间 "。比如 data[tick] 用于访问当前的市场价格。 + """ + pass + + def buy(self): + self._broker.buy() + + def sell(self): + self._broker.sell() + + @property + def data(self): + return self._data class SmaCross(Strategy): - fast = 10 - slow = 20 - - def init(self): - # 计算每个时刻的快线和慢线 - self.sma1 = self.I(SMA, self.data.Close, self.fast) - self.sma2 = self.I(SMA, self.data.Close, self.slow) - - def next(self, tick): - # 快线越过慢线,买入 - if crossover(self.sma1[:tick], self.sma2[:tick]): - self.buy() - # 慢线越过快线,卖出 - elif crossover(self.sma2[:tick], self.sma1[:tick]): - self.sell() - else: - pass - - + # 小窗口 SMA 的窗口大小,用于计算 SMA 快线 + fast = 30 + + # 大窗口 SMA 的窗口大小,用于计算 SMA 慢线 + slow = 90 + + def init(self): + # 计算历史上每个时刻的快线和慢线 + self.sma1 = self.I(SMA, self.data.Close, self.fast) + self.sma2 = self.I(SMA, self.data.Close, self.slow) + + def next(self, tick): + # 快线越过慢线,买入 + if crossover(self.sma1[:tick], self.sma2[:tick]): + self.buy() + + # 慢线越过快线,卖出 + elif crossover(self.sma2[:tick], self.sma1[:tick]): + self.sell() + + # 否则,这个时刻不执行任何操作 + else: + pass + + # 交易所类 class ExchangeAPI: - def __init__(self, data, cash, commission): - assert_msg(0 < cash, "初始现金数量需大于0,输入初始金额为{}".format(cash)) - assert_msg(0 <= commission <= 0.05, "合理手续费率不大于5%,输入的为{}".format(commission)) - self._initial_cash = cash - self._data = data - self._commission = commission - self._position = 0 - self._cash = cash - self._i = 0 - - @property - def cash(self): - return self._cash - - @property - def position(self): - return self._position - - @property - def initial_cash(self): - return self._initial_cash - - @property - def market_value(self): - return self._cash + self._position * self.current_price - - @property - def current_price(self): - return self._data.Close[self._i] - - def buy(self): - """用当前账户余额,全部按市价买入""" - self._position = float(self._cash / (self.current_price * (1 + self._commission))) - self._cash = 0.0 - - def sell(self): - """卖出当前账户所有持仓""" - self._cash += float(self._position * self.current_price * (1 - self._commission)) - self._position = 0.0 - - def next(self, tick): - self._i = tick + def __init__(self, data, cash, commission): + assert_msg(0 < cash, "初始现金数量需大于0,输入初始金额为{}".format(cash)) + assert_msg(0 <= commission <= 0.05, "合理手续费率不大于5%,输入的为{}".format(commission)) + self._initial_cash = cash + self._data = data + self._commission = commission + self._position = 0 + self._cash = cash + self._i = 0 + + @property + def cash(self): + """ + :return: 返回当前账户现金数量 + """ + return self._cash + + @property + def position(self): + """ + :return: 返回当前账户仓位 + """ + return self._position + + @property + def initial_cash(self): + """ + :return: 返回初始现金数量 + """ + return self._initial_cash + + @property + def market_value(self): + """ + :return: 返回当前市值 + """ + return self._cash + self._position * self.current_price + + @property + def current_price(self): + """ + :return: 返回当前市场价格 + """ + return self._data.Close[self._i] + + def buy(self): + """ + 用当前账户剩余资金,按照市场价格全部买入 + """ + self._position = float(self._cash * (1 - self._commission) / self.current_price) + self._cash = 0.0 + + def sell(self): + """ + 卖出当前账户剩余持仓 + """ + self._cash += float(self._position * self.current_price * (1 - self._commission)) + self._position = 0.0 + + def next(self, tick): + self._i = tick class Backtest: - """ - 回测类,用于读取历史行情数据,执行策略,模拟交易并估计收益。 - 调用run成员函数来执行。 - """ - def __init__(self, - data : pd.DataFrame, - strategy_type : type(Strategy), - broker_type : type(ExchangeAPI), - cash : float = 10000, - commission : float = .0): - assert_msg(issubclass(strategy_type, Strategy), "strategy_type不是一个Stragegy类型") - assert_msg(issubclass(broker_type, ExchangeAPI), "broker_type不是一个ExchangeAPI类型") - assert_msg(isinstance(commission, Number), "commission不是浮点数值类型") - - data = data.copy(False) - - # 如果没有volume列,填充Nan - if "Volume" not in data: - data["Volume"] = np.Nan - + """ + 回测类,用于读取历史行情数据,执行策略,模拟交易并估计收益。 + + 初始化的时候调用 Backtest.run 来执行回测。 + """ + + def __init__(self, + data: pd.DataFrame, + strategy_type: type(Strategy), + broker_type: type(ExchangeAPI), + cash: float = 10000, + commission: float = .0): + + """ + 构造回测对象。需要的参数包括:历史数据,策略对象,初始资金数量,手续费率等。 + 初始化过程包括检测输入类型,填充数据空值等。 + + 参数: + :param data: pd.DataFrame pandas Dataframe 格式的历史 OHLCV 数据 + :param strategy_type: type(Strategy) 策略类型 + :param broker_type: type(ExchangeAPI) 交易所 API 类型,负责执行买卖操作以及账户状态的维护 + :param cash: float 初始资金数量 + :param commission: float 每次交易手续费率。如 2% 的手续费此处为 0.02 + """ + + assert_msg(issubclass(strategy_type, Strategy), "strategy_type不是一个Strategy类型") + assert_msg(issubclass(broker_type, ExchangeAPI), "broker_type不是一个ExchangeAPI类型") + assert_msg(isinstance(commission, Number), "commission不是浮点数值类型") + + # False代表浅复制,没有内存拷贝 + data = data.copy(False) + + # 如果没有Volume列,填充NaN + if "Volume" not in data: + data["Volume"] = np.Nan + # 验证OHLC数据格式 - assert_msg(len(data.columns & {"Open", "High", "Low", "Close", "Volume"}) == 5, "输入data格式不正确,至少要包括五列") - + assert_msg(len(data.columns & {'Open', 'High', 'Low', 'Close', 'Volume'}) == 5, + ("输入的`data`格式不正确,至少需要包含这些列:" + "'Open', 'High', 'Low', 'Close'")) # 检查缺失值 - assert_msg(not data[["Open", "High", "Low", "Close", "Volume"]].max().isnull().any(), "部分数据包含缺失值") - - # 如果数据没有排序,就排序 - if not data.index.is_monotonic_increasing: - data = data.sort_index() - - # 利用数据,初始化交易所对象和策略对象 - self._data = data - self._broker = broker_type(data, cash, commission) - self._strategy = strategy_type(self._broker, self._data) - self._result = None - - def run(self) -> pd.Series: - """运行回测""" - strategy = self._strategy - broker = self._broker - # 策略初始化 - strategy.init() - - # 设定回测开始和结束位置 - start = 100 - end = len(self._data) - - # 回测主循环,更新市场状态,执行策略 - for i in range(start, end): - # 先把市场状态移动到第i时刻,然后执行策略 - broker.next(i) - strategy.next(i) - - # 执行完策略,计算并返回结果 - self._results = self._compute_result(broker) - return self._results - - def _compute_result(self, broker): - s = pd.Series() - s["初始市值"] = broker.initial_cash - s["结束市值"] = broker.market_value - s["收益"] = broker.market_value - broker.initial_cash - return s - + assert_msg(not data[['Open', 'High', 'Low', 'Close']].max().isnull().any(), + ('部分OHLC包含缺失值,请去掉那些行或者通过差值填充. ')) + + # 如果行情数据没有按照时间排序,重新排序一下 + if not data.index.is_monotonic_increasing: + data = data.sort_index() + + # 利用数据,初始化交易所对象和策略对象 + self._data = data # type: pd.DataFrame + self._broker = broker_type(data, cash, commission) + self._strategy = strategy_type(self._broker, self._data) + self._results = None + + def run(self) -> pd.Series: + + """ + 运行回测,迭代历史数据,执行模拟交易并返回回测结果。 + + Run the backtest. Returns `pd.Series` with results and statistics. + + Keyword arguments are interpreted as strategy parameters. + """ + + strategy = self._strategy + broker = self._broker + + # 策略初始化 + strategy.init() + + # 设定回测开始和结束位置 + start = 100 + end = len(self._data) + + # 回测主循环,更新市场状态,执行策略 + for i in range(start, end): + # 注意要先把市场状态移动到第 i 时刻,然后再执行策略。 + broker.next(i) + strategy.next(i) + + # 完成策略执行之后,计算结果并返回 + self._results = self._compute_result(broker) + return self._results + + def _compute_result(self, broker): + s = pd.Series() + s["初始市值"] = broker.initial_cash + s["结束市值"] = broker.market_value + s["收益"] = broker.market_value - broker.initial_cash + return s + if __name__ == "__main__": - BTCUSD = read_file("BTCUSD_GEMINI.csv") - assert_msg(BTCUSD.__len__()> 0, "读取失败") - print(BTCUSD.head()) - - ret = Backtest(BTCUSD, SmaCross, ExchangeAPI, 10000.0, 0.03).run() - print(ret) - \ No newline at end of file + # BTCUSD = read_file("BTCUSD_GEMINI.csv") + # assert_msg(BTCUSD.__len__()> 0, "读取失败") + # print(BTCUSD.head()) + + BTCUSD = read_file("BTCUSD_GEMINI.csv") + ret = Backtest(BTCUSD, SmaCross, ExchangeAPI, 10000.0, 0.003).run() + print(ret) diff --git a/Investment/datacatch.py b/Investment/datacatch.py index d307232b..bbd795bb 100644 --- a/Investment/datacatch.py +++ b/Investment/datacatch.py @@ -4,68 +4,72 @@ import requests import timeit +import time import websocket -import threading +import _thread as thread import ssl import json # 获取报价 def get_orderbook(): - orderbook = requests.get("https://api.gemini.com/v1/book/btcusd").json() - - + orderbook = requests.get("https://api.gemini.com/v1/book/btcusd").json() + + # WebSocket测试 # 在接收到服务器发送的消息时调用 def on_message(ws, message): - print("接受到:" + message) - with open("BTCUSD.txt", "a+") as f: - print(message) - f.write(json.dumps(message) + "/n") - + print("接受到:" + message) + # with open("BTCUSD.txt", "a+") as f: + # print(message) + # f.write(json.dumps(message) + "/n") + -# 在和服务器建立完全连接时调用 +# 在和服务器建立完成连接时调用 def on_open(ws): - # 线程运行函数 - def gao(): - # 往服务器发送0-4,每次发送完休息0.1秒 - for i in range(5): - time.sleep(0.1) - msg = "{0}".format(i) - ws.send(msg) - print("发送了:" + msg) - # 休息1秒用于接受服务器回复的消息 - time.sleep(1) - - # 关闭websocket连接 - ws.close() - print("websocket关闭了") - - # 在另一个线程运行gao函数 - threading.start_new_thread(gao, ()) + # 线程运行函数 + def gao(): + # 往服务器发送0-4,每次发送完休息0.1秒 + for i in range(5): + time.sleep(0.1) + msg = "{0}".format(i) + ws.send(msg) + print("发送了:" + msg) + # 休息1秒用于接受服务器回复的消息 + time.sleep(1) -# 获取报价 + # 关闭websocket连接 + ws.close() + print("websocket关闭了") + + # 在另一个线程运行gao函数 + thread.start_new_thread(gao, ()) + + +# 全局计数器 count = 5 + def on_mes(ws, message): - global count - print(message) - count -= 1 - print("count={}".format(count)) - # 接受五次消息后关闭 - if count == 0: - ws.close() + global count + print(message) + count -= 1 + print("count={}".format(count)) + # 接收了 5 次消息之后关闭 websocket 连接 + if count == 0: + ws.close() if __name__ == "__main__": - ''' - n = 10 - latency = timeit.timeit("get_orderbook()", setup = "from __main__ import get_orderbook", number = n) - print("Latency is {} ms.".format(latency * 1000)) - ''' - # ws = websocket.WebSocketApp("ws://echo.websocket.org", on_message = on_message, on_open = on_open) - # ws.run_forever() - ws = websocket.WebSocketApp("wss://api.gemini.com/v1/marketdata/btcusd?top_of_book=true&offers=true", on_message = on_message) - ws.run_forever(sslopt={"cert_reqs" : ssl.CERT_NONE}) - \ No newline at end of file + # n = 10 + # # latency = timeit.timeit("get_orderbook()", setup="from __main__ import get_orderbook", number=n) * 1.0 / n + # latency = timeit.timeit(stmt=get_orderbook, number=n) * 1.0 / n + # print("Latency is {} ms.".format(latency * 1000)) + + # ws = websocket.WebSocketApp("wss://echo.websocket.events/", on_message=on_message, on_open=on_open) + # ws.run_forever() + + ws = websocket.WebSocketApp("wss://api.gemini.com/v1/marketdata/btcusd?top_of_book=true&offers=true", + on_message=on_mes) + ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE}) diff --git a/Investment/do.py b/Investment/do.py index 83ff4200..88a42fe9 100644 --- a/Investment/do.py +++ b/Investment/do.py @@ -10,46 +10,43 @@ import datetime import time - if __name__ == "__main__": - base_url = "https://api.sandbox.gemini.com" - endpoint = "/v1/order/new" - url = base_url + endpoint - - gemini_api_key = "account-PsnWZPOnOf0FatEh3pWj" - gemini_api_secret = "49wC5psfvDhMmtKaNzmmVg42yYYb" - - t = datetime.datetime.now() - payload_nonce = str(int(time.mktime(t.timetuple()) * 1000)) - - payload = { - "request" : "/v1/order/new", - "nonce" : payload_nonce, - "symbol" : "btcusd", - "amount" : "5", - "price" : "7428.00", - "side" : "buy", - "type" : "exchange limit", - "options" : ["maker-or-cancel"] - } - - encoded_payload = json.dumps(payload).encode() - b64 = base64.b64encode(encoded_payload) - signature = hmac.new(bytes(gemini_api_secret, "latin-1"), b64, hashlib.sha384).hexdigest() - print(signature) - - requests_header = { - "Content-Type" : "text/plain", - "Content-Length" : "0", - "X-GEMINI-APIKEY" : gemini_api_key, - "X-GEMINI-PAYLOAD" : b64, - "X-GEMINI-SIGNATURE" : signature, - "Cache-Control" : "no-cache" - } - - response = requests.post(url, - data = None, - headers = requests_header) - new_order = response.json() - print(new_order) - \ No newline at end of file + base_url = "https://api.sandbox.gemini.com" + endpoint = "/v1/order/new" + url = base_url + endpoint + + gemini_api_key = "account-IZvU5Qvf8IrILPFw33yI" + gemini_api_secret = "UjDoXzvkTTfuGV6eThN35GpuqBF" + + t = datetime.datetime.now() + payload_nonce = str(int(time.mktime(t.timetuple()) * 1000)) + + # 下单操作信息 + payload = { + "request": "/v1/order/new", + "nonce": payload_nonce, + "symbol": "btcusd", + "amount": "2", + "price": "23840.00", + "side": "buy", + "type": "exchange limit", + "options": ["maker-or-cancel"] + } + encoded_payload = json.dumps(payload).encode() + b64 = base64.b64encode(encoded_payload) + + signature = hmac.new(bytes(gemini_api_secret, "latin-1"), b64, hashlib.sha384).hexdigest() + print(signature) + + requests_header = { + "Content-Type": "text/plain", + "Content-Length": "0", + "X-GEMINI-APIKEY": gemini_api_key, + "X-GEMINI-PAYLOAD": b64, + "X-GEMINI-SIGNATURE": signature, + "Cache-Control": "no-cache" + } + + response = requests.post(url, data=None, headers=requests_header) + new_order = response.json() + print(new_order) diff --git a/Investment/info_post.py b/Investment/info_post.py index 5a352766..b7e31015 100644 --- a/Investment/info_post.py +++ b/Investment/info_post.py @@ -7,19 +7,18 @@ def run(): - context = zmq.Context() - socket = context.socket(zmq.PUB) - socket.bind("tcp://*:6666") - socket.setsockopt_string(zmq.SUBSCRIBE, '') - - cnt = 1 - - while True: - time.sleep(1) - socket.send_string("server cnt {}".format(cnt)) - print("send {}".format(cnt)) - cnt += 1 - - + context = zmq.Context() + socket = context.socket(zmq.PUB) + socket.bind("tcp://*:6666") + + cnt = 1 + + while True: + time.sleep(1) + socket.send_string("server cnt {}".format(cnt)) + print("send {}".format(cnt)) + cnt += 1 + + if __name__ == "__main__": - run() + run() diff --git a/Investment/info_queue.py b/Investment/info_queue.py index 264ca0dc..2ab8c786 100644 --- a/Investment/info_queue.py +++ b/Investment/info_queue.py @@ -6,16 +6,16 @@ def run(): - context = zmq.Context() - socket = context.socket(zmq.SUB) - socket.connect("tcp://127.0.0.1:6666") - socket.setsockopt_string(zmq.SUBSCRIBE, '') - - print("client 1") - while True: - msg = socket.recv() - print("msg:%s" % msg) - - + context = zmq.Context() + socket = context.socket(zmq.SUB) + socket.connect("tcp://127.0.0.1:6666") + socket.setsockopt_string(zmq.SUBSCRIBE, '') + + print("client 1") + while True: + msg = socket.recv() + print("msg:%s" % msg) + + if __name__ == "__main__": - run() + run() diff --git a/Investment/info_queue2.py b/Investment/info_queue2.py index f2c08808..6b963f1c 100644 --- a/Investment/info_queue2.py +++ b/Investment/info_queue2.py @@ -6,16 +6,16 @@ def run(): - context = zmq.Context() - socket = context.socket(zmq.SUB) - socket.connect("tcp://127.0.0.1:6666") - socket.setsockopt_string(zmq.SUBSCRIBE, '') - - print("client 2") - while True: - msg = socket.recv() - print("msg:%s" % msg) - - + context = zmq.Context() + socket = context.socket(zmq.SUB) + socket.connect("tcp://127.0.0.1:6666") + socket.setsockopt_string(zmq.SUBSCRIBE, '') + + print("client 2") + while True: + msg = socket.recv() + print("msg:%s" % msg) + + if __name__ == "__main__": - run() + run() diff --git a/Investment/orderbook.py b/Investment/orderbook.py index 7d44c931..aec22611 100644 --- a/Investment/orderbook.py +++ b/Investment/orderbook.py @@ -10,82 +10,86 @@ class OrderBook(object): - - BIDS = "bid" - ASKS = "ask" - - def __init__(self, limit = 20): - self.limit = limit - - # (price, mount) - self.bids = {} - self.asks = {} - - self.bids_sorted = [] - self.asks_sorted = [] - - def insert(self, price, amount, direction): - if direction == self.BIDS: - if amount == 0: - if price in self.bids: - del self.bids[price] - else: - self.bids[price] = amount - elif direction == self.ASKS: - if amount == 0: - if price in self.asks: - del self.asks[price] - else: - self.asks[price] = amount - else: - print("错误:未知的方向{}".format(direction)) - - def sort_and_truncate(self): - # sort - self.bids_sorted = sorted([(price, amount) for price, amount in self.bids.items()], reverse = True) - self.asks_sorted = sorted([(price, amount) for price, amount in self.asks.items()]) - - # truncate - self.bids_sorted = self.bids_sorted[:self.limit] - self.asks_sorted = self.asks_sorted[:self.limit] - - # copy back - self.bids = dict(self.bids_sorted) - self.asks = dict(self.asks_sorted) - - def get_copy_of_bids_and_asks(self): - return copy.deepcopy(self.bids_sorted, copy.deepcopy(self.asks_sorted)) + BIDS = "bid" + ASKS = "ask" + + def __init__(self, limit=20): + self.limit = limit + + # (price, mount) + self.bids = {} # 买方挂单 + self.asks = {} # 卖方挂单 + + self.bids_sorted = [] + self.asks_sorted = [] + + def insert(self, price, amount, direction): + if direction == self.BIDS: + if amount == 0: + if price in self.bids: + del self.bids[price] + else: + self.bids[price] = amount + elif direction == self.ASKS: + if amount == 0: + if price in self.asks: + del self.asks[price] + else: + self.asks[price] = amount + else: + print("错误:未知的方向{}".format(direction)) + + def sort_and_truncate(self): + # sort + self.bids_sorted = sorted([(price, amount) for price, amount in self.bids.items()], reverse=True) + self.asks_sorted = sorted([(price, amount) for price, amount in self.asks.items()]) + + # truncate + self.bids_sorted = self.bids_sorted[:self.limit] + self.asks_sorted = self.asks_sorted[:self.limit] + + # copy back to bids and asks + self.bids = dict(self.bids_sorted) + self.asks = dict(self.asks_sorted) + + def get_copy_of_bids_and_asks(self): + return copy.deepcopy(self.bids_sorted), copy.deepcopy(self.asks_sorted) class Crawler: - def __init__(self, symbol, output_file): - self.orderbook = OrderBook(limit = 10) - self.output_file = output_file - - self.ws = websocket.WebSocketApp("wss://api.gemini.com/v1/marketdata/{}".format(symbol), on_message = lambda ws, message : self.on_message(message)) - self.ws.run_forever(sslopt = {"cert_reqs" : ssl.CERT_NONE}) - - def on_message(self, message): - data = json.loads(message) - print(data) - for event in data["events"]: - price, amount, direction = float(event["price"]), float(event["remaining"]), event["side"] - self.orderbook.insert(price, amount, direction) - - self.orderbook.sort_and_truncate() - - # 输出到文件 - with open(self.output_file, "a+") as f: - bids, asks = self.orderbook.get_copy_of_bids_and_asks() - output = { - "bids" : bids, - "asks" : asks, - "ts" : int(time.time() * 1000) - } - print(json.dump(output)) - f.write(json.dump(output) + "\n") + def __init__(self, symbol, output_file): + self.orderbook = OrderBook(limit=10) + self.output_file = output_file + + self.ws = websocket.WebSocketApp("wss://api.gemini.com/v1/marketdata/{}".format(symbol), + on_message=lambda ws, message: self.on_message(message)) + self.ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE}) + + def on_message(self, message): + # 对收到的信息进行处理,然后送给 orderbook + data = json.loads(message) + print(data) + for event in data["events"]: + price, amount, direction = float(event["price"]), float(event["remaining"]), event["side"] + self.orderbook.insert(price, amount, direction) + + # 整理 orderbook,排序,只选取我们需要的前几个 + self.orderbook.sort_and_truncate() + + # 输出到文件 + with open(self.output_file, "a+") as f: + try: + bids, asks = self.orderbook.get_copy_of_bids_and_asks() + output = { + "bids": bids, + "asks": asks, + "ts": int(time.time() * 1000) + } + print(json.dumps(output)) + f.write(json.dumps(output) + "\n") + except Exception as err: + print("异常:{}".format(err)) if __name__ == "__main__": - crawler = Crawler(symbol = "BTCUSD", output_file = "BTCUSD.txt") - \ No newline at end of file + crawler = Crawler(symbol="BTCUSD", output_file="BTCUSD.txt") diff --git a/Investment/ultil.py b/Investment/ultil.py index 1f25e6c9..e49dc861 100644 --- a/Investment/ultil.py +++ b/Investment/ultil.py @@ -6,11 +6,17 @@ def SMA(values, n): - """返回简单滑动平均""" - return pd.Series(values).rolling(n).mean() - - -def crossover(series1, series2): - """检查两个序列是否在结尾交叉""" - return series1[-2] < series2[-2] and series1[-1]> series1[-1] - \ No newline at end of file + """ + 返回简单滑动平均 + """ + return pd.Series(values).rolling(n).mean() + + +def crossover(series1, series2) -> bool: + """ + 检查两个序列是否在结尾交叉 + :param series1: 序列1 + :param series2: 序列2 + :return: 如果交叉返回True,反之False + """ + return series1[-2] < series2[-2] and series1[-1]> series2[-1]