fix covid_stats_via_xpath.py (#12975)

lighting9999 · pre-commit-ci[bot] · web-flow · commit 4ec71a303b47 · 2025-09-18T15:55:59.000+02:00
* fix covid_stats_via_xpath.py Improve error handling. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix covid_stats_via_xpath.py typo * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix ruff * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * upgrade covid_stats_via_xpath.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update and fix covid_stats_via_xpath.py --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/web_programming/covid_stats_via_xpath.py b/web_programming/covid_stats_via_xpath.py
@@ -1,7 +1,8 @@
 """
-This is to show simple COVID19 info fetching from worldometers archive site using lxml
-* The main motivation to use lxml in place of bs4 is that it is faster and therefore
-more convenient to use in Python web projects (e.g. Django or Flask-based)
+This script demonstrates fetching simple COVID-19 statistics from the
+Worldometers archive site using lxml. lxml is chosen over BeautifulSoup
+for its speed and convenience in Python web projects (such as Django or
+Flask).
 """
 
 # /// script
@@ -25,15 +26,34 @@ class CovidData(NamedTuple):
 
 
 def covid_stats(
- url: str = "https://web.archive.org/web/20250825095350/https://www.worldometers.info/coronavirus/",
+ url: str = (
+ "https://web.archive.org/web/20250825095350/"
+ "https://www.worldometers.info/coronavirus/"
+ ),
 ) -> CovidData:
 xpath_str = '//div[@class = "maincounter-number"]/span/text()'
- return CovidData(
- *html.fromstring(httpx.get(url, timeout=10).content).xpath(xpath_str)
+ try:
+ response = httpx.get(url, timeout=10).raise_for_status()
+ except httpx.TimeoutException:
+ print(
+ "Request timed out. Please check your network connection "
+ "or try again later."
+ )
+ return CovidData("N/A", "N/A", "N/A")
+ except httpx.HTTPStatusError as e:
+ print(f"HTTP error occurred:{e}")
+ return CovidData("N/A", "N/A", "N/A")
+ data = html.fromstring(response.content).xpath(xpath_str)
+ if len(data) != 3:
+ print("Unexpected data format. The page structure may have changed.")
+ data = "N/A", "N/A", "N/A"
+ return CovidData(*data)
+
+
+if __name__ == "__main__":
+ fmt = (
+ "Total COVID-19 cases in the world:{}\n"
+ "Total deaths due to COVID-19 in the world:{}\n"
+ "Total COVID-19 patients recovered in the world:{}"
 )
-
-
-fmt = """Total COVID-19 cases in the world:{}
-Total deaths due to COVID-19 in the world:{}
-Total COVID-19 patients recovered in the world:{}"""
-print(fmt.format(*covid_stats()))
+ print(fmt.format(*covid_stats()))

-Original file line number
+Diff line change
@@ @@ -1,7 +1,8 @@ @@
 """
 -This is to show simple COVID19 info fetching from worldometers archive site using lxml
 -* The main motivation to use lxml in place of bs4 is that it is faster and therefore
 -more convenient to use in Python web projects (e.g. Django or Flask-based)
 +This script demonstrates fetching simple COVID-19 statistics from the
 +Worldometers archive site using lxml. lxml is chosen over BeautifulSoup
 +for its speed and convenience in Python web projects (such as Django or
 +Flask).
 """
 # /// script
 defcovid_stats(
 -url: str="https://web.archive.org/web/20250825095350/https://www.worldometers.info/coronavirus/",
 +url: str= (
 +"https://web.archive.org/web/20250825095350/"
 +"https://www.worldometers.info/coronavirus/"
 + ),
 ) ->CovidData:
 xpath_str='//div[@class = "maincounter-number"]/span/text()'
 -returnCovidData(
 -*html.fromstring(httpx.get(url, timeout=10).content).xpath(xpath_str)
 +try:
 +response=httpx.get(url, timeout=10).raise_for_status()
 +excepthttpx.TimeoutException:
 +print(
 +"Request timed out. Please check your network connection "
 +"or try again later."
 + )
 +returnCovidData("N/A", "N/A", "N/A")
 +excepthttpx.HTTPStatusErrorase:
 +print(f"HTTP error occurred: {e}")
 +returnCovidData("N/A", "N/A", "N/A")
 +data=html.fromstring(response.content).xpath(xpath_str)
 +iflen(data) !=3:
 +print("Unexpected data format. The page structure may have changed.")
 +data="N/A", "N/A", "N/A"
 +returnCovidData(*data)
++
++
 +if__name__=="__main__":
 +fmt= (
 +"Total COVID-19 cases in the world:{}\n"
 +"Total deaths due to COVID-19 in the world:{}\n"
 +"Total COVID-19 patients recovered in the world:{}"
+ )
+-
+-
 -fmt="""Total COVID-19 cases in the world:{}
 -Total deaths due to COVID-19 in the world:{}
 -Total COVID-19 patients recovered in the world:{}"""
 -print(fmt.format(*covid_stats()))
 +print(fmt.format(*covid_stats()))