From 7326cf9c9a666568d5a209d3e6364a1de080bc90 Mon Sep 17 00:00:00 2001 From: "joey.wang" Date: Fri, 1 Mar 2024 15:17:08 +0800 Subject: [PATCH] wck update FileStorageTest --- demo1.py | 11 +++--- demo1_wck.py | 28 +++++++++++++++ demo2.py | 4 ++- demo3.py | 2 +- demo4.py | 2 +- demo5.py | 10 +++--- movies.txt | 98 ++++++++++++++++++++++++++-------------------------- 7 files changed, 93 insertions(+), 62 deletions(-) create mode 100644 demo1_wck.py diff --git a/demo1.py b/demo1.py index 027bca5..5abe3ad 100644 --- a/demo1.py +++ b/demo1.py @@ -3,6 +3,7 @@ import re url = 'https://static1.scrape.center/' +url = 'https://ssr1.scrape.center/' html = requests.get(url).text doc = pq(html) items = doc('.el-card').items() @@ -27,8 +28,8 @@ file.close() -with open('movies.txt', 'w', encoding='utf-8'): - file.write(f'名称: {name}\n') - file.write(f'类别: {categories}\n') - file.write(f'上映时间: {published_at}\n') - file.write(f'评分: {score}\n') \ No newline at end of file +# with open('movies.txt', 'w', encoding='utf-8') as file: +# file.write(f'名称: {name}\n') +# file.write(f'类别: {categories}\n') +# file.write(f'上映时间: {published_at}\n') +# file.write(f'评分: {score}\n') diff --git a/demo1_wck.py b/demo1_wck.py new file mode 100644 index 0000000..441b5a8 --- /dev/null +++ b/demo1_wck.py @@ -0,0 +1,28 @@ +import requests +from pyquery import PyQuery as pq +import re + +url = 'https://static1.scrape.center/' +url = 'https://ssr1.scrape.center/' +html = requests.get(url).text +doc = pq(html) +items = doc('.el-card').items() + +file = open('movies.txt', 'w', encoding='utf-8') +for item in items: + # 名称 + name = item.find('a > h2').text() + file.write(f"名称:{name}\n") + # 类别 + categories = [item.text() for item in item.find('.categories button span').items()] + file.write(f"类别:{categories}\n") + # 上映时间 + published_at = item.find('.info:contains(上映)').text() + published_at = re.search('(\d{4}-\d{2}-\d{2})', published_at).group(1) \ + if published_at and re.search('(\d{4}-\d{2}-\d{2})', published_at) else None + file.write(f"上映时间:{published_at}\n") + # 评分 + score = item.find('p.score').text() + file.write(f"评分:{score}\n") + file.write(f'{"=" * 50}\n') +file.close() diff --git a/demo2.py b/demo2.py index 8c4f1ff..763cb4d 100644 --- a/demo2.py +++ b/demo2.py @@ -14,4 +14,6 @@ print(type(str)) data = json.loads(str) print(data) -print(type(data)) \ No newline at end of file +print(type(data)) +print(data[0]["name"]) +print(data[0].get("name")) diff --git a/demo3.py b/demo3.py index e5cef61..de5a147 100644 --- a/demo3.py +++ b/demo3.py @@ -6,4 +6,4 @@ print(data) data = json.load(open('data.json', encoding='utf-8')) -print(data) \ No newline at end of file +print(data) diff --git a/demo4.py b/demo4.py index eac4195..878f178 100644 --- a/demo4.py +++ b/demo4.py @@ -9,4 +9,4 @@ file.write(json.dumps(data)) with open('data.json', 'w', encoding='utf-8') as file: - file.write(json.dumps(data, indent=2)) \ No newline at end of file + file.write(json.dumps(data, indent=2)) diff --git a/demo5.py b/demo5.py index 8ef4da2..4b1aee2 100644 --- a/demo5.py +++ b/demo5.py @@ -6,10 +6,10 @@ 'birthday': '1992-10-18' }] -with open('data.json', 'w', encoding='utf-8') as file: - file.write(json.dumps(data, indent=2)) - +# with open('data.json', 'w', encoding='utf-8') as file: +# file.write(json.dumps(data, indent=2)) + with open('data.json', 'w', encoding='utf-8') as file: file.write(json.dumps(data, indent=2, ensure_ascii=False)) - -json.dump(data, open('data.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) \ No newline at end of file + +json.dump(data, open('data.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) diff --git a/movies.txt b/movies.txt index ed8f945..5d4b4bb 100644 --- a/movies.txt +++ b/movies.txt @@ -1,50 +1,50 @@ -名称: 霸王别姬 - Farewell My Concubine -类别: ['剧情', '爱情'] -上映时间: 1993-07-26 -评分: 9.5 -================================================== -名称: 这个杀手不太冷 - Léon -类别: ['剧情', '动作', '犯罪'] -上映时间: 1994-09-14 -评分: 9.5 -================================================== -名称: 肖申克的救赎 - The Shawshank Redemption -类别: ['剧情', '犯罪'] -上映时间: 1994-09-10 -评分: 9.5 -================================================== -名称: 泰坦尼克号 - Titanic -类别: ['剧情', '爱情', '灾难'] -上映时间: 1998-04-03 -评分: 9.5 -================================================== -名称: 罗马假日 - Roman Holiday -类别: ['剧情', '喜剧', '爱情'] -上映时间: 1953-08-20 -评分: 9.5 -================================================== -名称: 唐伯虎点秋香 - Flirting Scholar -类别: ['喜剧', '爱情', '古装'] -上映时间: 1993-07-01 -评分: 9.5 -================================================== -名称: 乱世佳人 - Gone with the Wind -类别: ['剧情', '爱情', '历史', '战争'] -上映时间: 1939-12-15 -评分: 9.5 -================================================== -名称: 喜剧之王 - The King of Comedy -类别: ['剧情', '喜剧', '爱情'] -上映时间: 1999-02-13 -评分: 9.5 -================================================== -名称: 楚门的世界 - The Truman Show -类别: ['剧情', '科幻'] -上映时间: None -评分: 9.0 -================================================== -名称: 狮子王 - The Lion King -类别: ['动画', '歌舞', '冒险'] -上映时间: 1995-07-15 -评分: 9.0 +名称:霸王别姬 - Farewell My Concubine +类别:['剧情', '爱情'] +上映时间:1993-07-26 +评分:9.5 +================================================== +名称:这个杀手不太冷 - Léon +类别:['剧情', '动作', '犯罪'] +上映时间:1994-09-14 +评分:9.5 +================================================== +名称:肖申克的救赎 - The Shawshank Redemption +类别:['剧情', '犯罪'] +上映时间:1994-09-10 +评分:9.5 +================================================== +名称:泰坦尼克号 - Titanic +类别:['剧情', '爱情', '灾难'] +上映时间:1998-04-03 +评分:9.5 +================================================== +名称:罗马假日 - Roman Holiday +类别:['剧情', '喜剧', '爱情'] +上映时间:1953-08-20 +评分:9.5 +================================================== +名称:唐伯虎点秋香 - Flirting Scholar +类别:['喜剧', '爱情', '古装'] +上映时间:1993-07-01 +评分:9.5 +================================================== +名称:乱世佳人 - Gone with the Wind +类别:['剧情', '爱情', '历史', '战争'] +上映时间:1939-12-15 +评分:9.5 +================================================== +名称:喜剧之王 - The King of Comedy +类别:['剧情', '喜剧', '爱情'] +上映时间:1999-02-13 +评分:9.5 +================================================== +名称:楚门的世界 - The Truman Show +类别:['剧情', '科幻'] +上映时间:None +评分:9.0 +================================================== +名称:狮子王 - The Lion King +类别:['动画', '歌舞', '冒险'] +上映时间:1995-07-15 +评分:9.0 ==================================================