From 464d617ecccc901435a9f2874388e4ea44e85f3a Mon Sep 17 00:00:00 2001 From: Kim Taylor Date: Wed, 13 Nov 2024 07:24:05 +1100 Subject: [PATCH] Add parser to scrape winning candidates from VEC site. --- results/fetch.sh | 5 +++++ results/parser.py | 24 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100755 results/fetch.sh create mode 100644 results/parser.py diff --git a/results/fetch.sh b/results/fetch.sh new file mode 100755 index 0000000..6184537 --- /dev/null +++ b/results/fetch.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +council_name=boroondara-city-council + +wget https://www.vec.vic.gov.au/voting/2024-local-council-elections/$council_name/results -O $council_name diff --git a/results/parser.py b/results/parser.py new file mode 100644 index 0000000..734e593 --- /dev/null +++ b/results/parser.py @@ -0,0 +1,24 @@ +from bs4 import BeautifulSoup, Tag as HTMLTag + +with open("boroondara-city-council", 'r') as results_fp: + html_doc = results_fp.read() + +soup = BeautifulSoup(html_doc, 'html.parser') +candidates0 = soup.find_all(string="Successful candidates") +candidates1 = soup.find_all(string="Elected candidates") + +def get_candidate_name(candidate): + for sibling in candidate.parent.next_siblings: + if not isinstance(sibling, HTMLTag): + continue + if not (block := sibling.find('td', class_="list-item-body")): + continue + return block.text.strip() + +names = [] +for candidate in candidates0: + names.append(get_candidate_name(candidate)) +for candidate in candidates1: + names.append(get_candidate_name(candidate)) + +print(names)