Results parser working for all LGAs (except melbourne)

This commit is contained in:
Kim Taylor
2024-11-16 12:03:29 +11:00
parent 464d617ecc
commit f8fd1cc20c
4 changed files with 235 additions and 17 deletions

View File

@@ -1,5 +1,15 @@
#!/bin/bash #!/bin/bash
council_name=boroondara-city-council mkdir -p html
wget https://www.vec.vic.gov.au/results/2024-council-election-results -O html/lga_list.html
wget https://www.vec.vic.gov.au/voting/2024-local-council-elections/$council_name/results -O $council_name IFS=$'\n'
lgas=$(grep 'href="/voting/.*/results"' html/lga_list.html)
for lga in $lgas ; do
lga=$(sed 's|.*href="|https://www.vec.vic.gov.au|' <<< $lga)
lga=$(sed 's|">.*||' <<< $lga)
file=$(sed 's|.*elections/||' <<< $lga | sed s'|/results||')
wget $lga -O html/$file
done

158
results/gen-elected.php Normal file
View File

@@ -0,0 +1,158 @@
<?php
//require_once("parse_generic_csv.php");
$options = getopt("", ["candidates-files:"]);
if (isset($options['candidates-files'])) {
$candidates_files = $options['candidates-files'];
} else {
error_log("Error: Missing required option '--candidates-files'.");
exit(1);
}
function trim_sluggify($input) {
return strtolower(str_replace(' ', '-', trim($input)));
}
$candidates_files = explode(" ", $candidates_files);
/* Generate dictionary of candidates and LGAs */
$candidate_data = [];
foreach ($candidates_files as $file) {
$config_file = dirname($file)."/config.json";
$config_string = file_get_contents($config_file);
if ($config_string !== FALSE) {
$config = json_decode($config_string, true);
} else {
error_log("Error opening config.json.");
exit(1);
}
if (($handle = fopen($file, "r")) !== FALSE) {
$headers = fgetcsv($handle);
while (($data = fgetcsv($handle)) !== FALSE) {
$candidate = [];
foreach ($headers as $key => $value) {
$candidate[$value] = $data[$key];
}
$candidate['Council'] = $config['councilName'];
$name_slug = trim_sluggify($candidate['Candidate Name']);
$candidate_data[$name_slug] = $candidate;
}
}
}
print_r($candidate_data);
/* Get list of elected candidates */
//$lga_list = [];
/* Generate dictionary of LGAs and Wards */
//foreach ($config_files as $config_file) {
// $config_string = file_get_contents($config_file);
// if ($config_string !== FALSE) {
// $config = json_decode($config_string, true);
// } else {
// error_log("Error opening config.json.");
// exit(1);
// }
// $config['config-file'] = $config_file;
// $lga_list[] = $config;
//}
/* Match user typed LGA/Ward to our database */
//match_lga($candidate_data, $lga_list);
$header = ["Ward", "Candidate Name", "Rating", "Pledge", "Picture"];
/* Generate candidates-generic.csv */
//foreach ($lga_list as $lga) {
// $lga_candidates = array_filter($candidate_data, function ($candidate) use ($lga) {
// return $candidate['match_lga'] === $lga['slug'];
// });
//
// if (count($lga_candidates) === 0) continue;
//
// remove_duplicates($lga_candidates);
//
// $dir = dirname($lga['config-file']);
// $dir_files = scandir($dir);
// $output_file = $dir."/candidates-generic.csv";
// $override_file = $dir."/candidates-override.csv";
//
// if (($handle = fopen($output_file, "w")) === FALSE) {
// error_log('Error opening output file');
// exit(1);
// }
//
// if (fputcsv($handle, $header) === FALSE) {
// error_log('Error writing headers to output file');
// exit(3);
// }
//
// $lines = [];
// foreach ($lga_candidates as $candidate) {
// /* Add extension to photo hash */
// if (strlen($candidate['Photo'])) {
// foreach ($dir_files as $file) {
// if (preg_match("/\.json$/", $file)) continue;
// if (strstr($file, $candidate['Photo'])) {
// $candidate['Photo'] = $file;
// }
// }
// }
//
// $lines[] = [
// $candidate['match_ward'],
// $candidate['Name'],
// $candidate['Score'],
// $candidate['Pledge'],
// $candidate['Photo'],
// ];
// }
//
// /* Apply overrides if they exist */
// $overrides = [];
// if (file_exists($override_file)) {
// if (($ovr_handle = fopen($override_file, "r")) !== FALSE) {
// $headers = fgetcsv($ovr_handle);
// while (($data = fgetcsv($ovr_handle)) !== FALSE) {
// $override = [];
// foreach ($headers as $key => $value) {
// $override[$value] = $data[$key];
// }
// $overrides[] = $override;
// }
// fclose($ovr_handle);
// } else {
// error_log('Error opening overrides file');
// exit(3);
// }
// }
//
// foreach ($overrides as $override) {
// foreach ($lines as $line_key => $line) {
// $match_index = array_search($override['Match Field'], $header);
// $replace_index = array_search($override['Replace Field'], $header);
// if ($line[$match_index] === $override['Match Value']) {
// if ($replace_index !== false)
// $lines[$line_key][$replace_index] = $override['Replace Value'];
// else /* If 'Replace Field' is not matched - delete this entry */
// $lines[$line_key]['Delete'] = 'y';
// }
// }
// }
//
// foreach ($lines as $line) {
// if (isset($line['Delete'])) continue;
// if (fputcsv($handle, $line) === FALSE) {
// error_log('Error writing candidate to output file');
// exit(3);
// }
// }
// fclose($handle);
//}
exit(0);

View File

@@ -1,24 +1,53 @@
from bs4 import BeautifulSoup, Tag as HTMLTag from bs4 import BeautifulSoup, Tag as HTMLTag
import json, re, argparse
with open("boroondara-city-council", 'r') as results_fp: parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*')
args = parser.parse_args()
def get_vacancies(ward):
text = ward.parent.parent.h2.text
ward_name = re.search("[^\(]*", text)[0].strip()
vacancies = int(re.search("\([0-9]+", text)[0].strip("("))
return (ward_name, vacancies, ward)
def get_candidate_names(ward_desc):
names = []
for sibling in ward_desc[2].parent.next_siblings:
if not isinstance(sibling, HTMLTag):
continue
if not (blocks := sibling.find_all('td', class_="list-item-body")):
continue
for block in blocks:
names.append(re.sub('\n.*', '', block.text.strip()))
return names
def parse_lga(filename):
with open(filename, 'r') as results_fp:
html_doc = results_fp.read() html_doc = results_fp.read()
soup = BeautifulSoup(html_doc, 'html.parser') soup = BeautifulSoup(html_doc, 'html.parser')
candidates0 = soup.find_all(string="Successful candidates") wards0 = soup.find_all(string="Successful candidates")
candidates1 = soup.find_all(string="Elected candidates") wards1 = soup.find_all(string="Elected candidates")
def get_candidate_name(candidate): ward_info = []
for sibling in candidate.parent.next_siblings: for ward in wards0:
if not isinstance(sibling, HTMLTag): ward_info.append(get_vacancies(ward))
continue for ward in wards1:
if not (block := sibling.find('td', class_="list-item-body")): ward_info.append(get_vacancies(ward))
continue
return block.text.strip()
names = [] results = {}
for candidate in candidates0: for ward in ward_info:
names.append(get_candidate_name(candidate)) names = get_candidate_names(ward)
for candidate in candidates1: assert len(names) == ward[1]
names.append(get_candidate_name(candidate)) results[ward[0]] = names
print(names) return results
all_results = {}
for lga in args.filenames:
lga_name = re.sub('html/lgas/', '', lga)
results = parse_lga(lga)
all_results[lga_name] = results
print(json.dumps(all_results, indent=4))

21
update-elected.sh Executable file
View File

@@ -0,0 +1,21 @@
#!/bin/bash
# This script uses the jq, wp, and php commands, make sure they are installed before running this script.
# The folder containing data for each council.
# Includes the list of candidates and any media.
DATA_PATH="../spl-data"
# Iterate over folders in data path
candidates_files=()
for folder in "$DATA_PATH"/*; do
if test -f "$folder"/candidates-generic.csv; then
candidates_files+=("$folder"/candidates-generic.csv)
fi
# Community groups get priority
if test -f "$folder"/candidates.csv; then
candidates_files+=("$folder"/candidates.csv)
fi
done
php results/gen-elected.php --candidates-files "${candidates_files[*]}"