Generate candidates-generic.csv files based on fuzzy match with LGA/Ward.

This commit is contained in:
Kim Taylor
2024-09-22 19:46:39 +10:00
parent 2f5806a227
commit 219b242503
3 changed files with 131 additions and 6 deletions

View File

@@ -0,0 +1,20 @@
<?php
require_once("parse_generic_csv.php");
$options = getopt("", ["generic-csv:"]);
if (isset($options['generic-csv'])) {
$generic_csv = $options['generic-csv'];
} else {
error_log("Error: Missing required option '--generic-csv'.");
exit(1);
}
$candidate_data = parse_generic_csv($generic_csv);
$json_data = json_encode($candidate_data);
print_r($json_data);
exit(0);

View File

@@ -2,7 +2,7 @@
require_once("parse_generic_csv.php"); require_once("parse_generic_csv.php");
$options = getopt("", ["generic-csv:"]); $options = getopt("", ["generic-csv:", "config-files:"]);
if (isset($options['generic-csv'])) { if (isset($options['generic-csv'])) {
$generic_csv = $options['generic-csv']; $generic_csv = $options['generic-csv'];
@@ -11,8 +11,30 @@ if (isset($options['generic-csv'])) {
exit(1); exit(1);
} }
if (isset($options['config-files'])) {
$config_files = $options['config-files'];
} else {
error_log("Error: Missing required option '--config-files'.");
exit(1);
}
$config_files = explode(" ", $config_files);
$candidate_data = parse_generic_csv($generic_csv); $candidate_data = parse_generic_csv($generic_csv);
$lga_list = [];
/* Generate dictionary of LGAs and Wards */
foreach ($config_files as $config_file) {
$config_string = file_get_contents($config_file);
if ($config_string !== FALSE) {
$config = json_decode($config_string, true);
} else {
error_log("Error opening config.json.");
exit(1);
}
$config['config-file'] = $config_file;
$lga_list[] = $config;
}
/* Calculate score for candidate */ /* Calculate score for candidate */
foreach ($candidate_data as &$candidate) { foreach ($candidate_data as &$candidate) {
$score = 0; $score = 0;
@@ -26,6 +48,81 @@ foreach ($candidate_data as &$candidate) {
$candidate['Score'] = $score; $candidate['Score'] = $score;
} }
print_r($candidate_data); foreach ($candidate_data as &$candidate) {
/* Match user typed LGA/Ward to our database */
$max_score = 0;
foreach ($lga_list as $lga) {
$aa = preg_split("/[^a-z]/", strtolower($candidate['LGA']));
$bb = preg_split("/[^a-z]/", $lga['slug']);
$score_sum = 0;
foreach ($aa as $a) {
foreach ($bb as $b) {
similar_text($a, $b, $score);
if ($score > 70) $score_sum += $score;
else $score_sum -= 10;
}
}
if ($score_sum > $max_score) {
$max_score = $score_sum;
$match_lga = $lga;
}
}
$max_score = 0;
foreach ($match_lga['wardNames'] as $ward) {
similar_text($ward, $candidate['Ward'], $score);
if ($score > $max_score) {
$max_score = $score;
$match_ward = $ward;
}
}
$candidate['match_lga'] = $match_lga['slug'];
$candidate['match_ward'] = $match_ward;
}
/* Get picture */
foreach ($candidate_data as &$candidate) {
$candidate['match_picture'] = "";
}
$header = ["Ward", "Candidate Name", "Rating", "Picture"];
/* Generate candidates-generic.csv */
foreach ($lga_list as $lga) {
$lga_candidates = array_filter($candidate_data, function ($candidate) use ($lga) {
return $candidate['match_lga'] === $lga['slug'];
});
if (count($lga_candidates) === 0) continue;
$output_file = dirname($lga['config-file'])."/candidates-generic.csv";
if (($handle = fopen($output_file, "w")) === FALSE) {
error_log('Error opening output file');
exit(1);
}
if (fputcsv($handle, $header) === FALSE) {
error_log('Error writing headers to output file');
exit(3);
}
foreach ($lga_candidates as $candidate) {
$fields = [
$candidate['match_ward'],
$candidate['Name'],
$candidate['Score'],
$candidate['match_picture'],
];
if (fputcsv($handle, $fields) === FALSE) {
error_log('Error writing candidate to output file');
exit(3);
}
}
}
exit(0); exit(0);

View File

@@ -1,12 +1,20 @@
#!/bin/bash #!/bin/bash
DATA_LOC=../generic-survey
#rclone sync --progress bikewest:spl_generic_survey_2024 $DATA_LOC/google-data #rclone sync --progress bikewest:spl_generic_survey_2024 $DATA_LOC/google-data
#rclone --drive-export-formats csv copyto 'bikewest:spl_generic_survey_2024/Streets People Love council election candidate pledge and survey (Responses).csv' $DATA_LOC/responses.csv #rclone --drive-export-formats csv copyto 'bikewest:spl_generic_survey_2024/Streets People Love council election candidate pledge and survey (Responses).csv' $DATA_LOC/responses.csv
GENERIC_SURVEY=../generic-survey/responses.csv
#content=$(php pledge-update/pledge-page.php --candidates-files "${candidates_files[*]}") DATA_PATH="../spl-data"
php csv-generic/gen-generic.php --generic-csv $DATA_LOC/responses.csv config_files=()
for folder in "$DATA_PATH"/*; do
if test -f "$folder"/config.json; then
config_files+=("$folder"/config.json)
fi
done
#php csv-generic/csv-to-json.php --generic-csv $DATA_LOC/responses.csv
php csv-generic/gen-generic.php --generic-csv $GENERIC_SURVEY --config-files "${config_files[*]}"