Add parser to scrape winning candidates from VEC site.
This commit is contained in:
24
results/parser.py
Normal file
24
results/parser.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from bs4 import BeautifulSoup, Tag as HTMLTag
|
||||
|
||||
with open("boroondara-city-council", 'r') as results_fp:
|
||||
html_doc = results_fp.read()
|
||||
|
||||
soup = BeautifulSoup(html_doc, 'html.parser')
|
||||
candidates0 = soup.find_all(string="Successful candidates")
|
||||
candidates1 = soup.find_all(string="Elected candidates")
|
||||
|
||||
def get_candidate_name(candidate):
|
||||
for sibling in candidate.parent.next_siblings:
|
||||
if not isinstance(sibling, HTMLTag):
|
||||
continue
|
||||
if not (block := sibling.find('td', class_="list-item-body")):
|
||||
continue
|
||||
return block.text.strip()
|
||||
|
||||
names = []
|
||||
for candidate in candidates0:
|
||||
names.append(get_candidate_name(candidate))
|
||||
for candidate in candidates1:
|
||||
names.append(get_candidate_name(candidate))
|
||||
|
||||
print(names)
|
||||
Reference in New Issue
Block a user