Add parser to scrape winning candidates from VEC site.

This commit is contained in:
Kim Taylor
2024-11-13 07:24:05 +11:00
parent af457dbd8c
commit 464d617ecc
2 changed files with 29 additions and 0 deletions

5
results/fetch.sh Executable file
View File

@@ -0,0 +1,5 @@
#!/bin/bash
council_name=boroondara-city-council
wget https://www.vec.vic.gov.au/voting/2024-local-council-elections/$council_name/results -O $council_name

24
results/parser.py Normal file
View File

@@ -0,0 +1,24 @@
from bs4 import BeautifulSoup, Tag as HTMLTag
with open("boroondara-city-council", 'r') as results_fp:
html_doc = results_fp.read()
soup = BeautifulSoup(html_doc, 'html.parser')
candidates0 = soup.find_all(string="Successful candidates")
candidates1 = soup.find_all(string="Elected candidates")
def get_candidate_name(candidate):
for sibling in candidate.parent.next_siblings:
if not isinstance(sibling, HTMLTag):
continue
if not (block := sibling.find('td', class_="list-item-body")):
continue
return block.text.strip()
names = []
for candidate in candidates0:
names.append(get_candidate_name(candidate))
for candidate in candidates1:
names.append(get_candidate_name(candidate))
print(names)