Add parser to scrape winning candidates from VEC site.

2024-11-13 07:24:05 +11:00
parent af457dbd8c
commit 464d617ecc
2 changed files with 29 additions and 0 deletions
--- a/results/parser.py
+++ b/results/parser.py
@@ -0,0 +1,24 @@
+from bs4 import BeautifulSoup, Tag as HTMLTag
+
+with open("boroondara-city-council", 'r') as results_fp:
+    html_doc = results_fp.read()
+
+soup = BeautifulSoup(html_doc, 'html.parser')
+candidates0 = soup.find_all(string="Successful candidates")
+candidates1 = soup.find_all(string="Elected candidates")
+
+def get_candidate_name(candidate):
+    for sibling in candidate.parent.next_siblings:
+        if not isinstance(sibling, HTMLTag):
+            continue
+        if not (block := sibling.find('td', class_="list-item-body")):
+            continue
+        return block.text.strip()
+
+names = []
+for candidate in candidates0:
+    names.append(get_candidate_name(candidate))
+for candidate in candidates1:
+    names.append(get_candidate_name(candidate))
+
+print(names)