Implement CSV normaliser logic

This commit is contained in:
2024-08-15 22:10:03 +10:00
parent fea2f69686
commit d9cb183b2a

View File

@@ -1,69 +1,144 @@
<?php <?php
require_once "page_renderer.php"; $options = getopt("", ["input:", "output:", "media:"]);
$options = getopt("", ["council-file:", "candidates-file:", "media-file:"]); if (isset($options['input'])) {
$inputFile = $options['input'];
if (isset($options['council-file'])) {
$councilFileContents = file_get_contents($options['council-file']);
} else { } else {
error_log("Error: Missing required option '--council-file'."); error_log("Error: Missing required option '--input'.");
exit(1); exit(1);
} }
$councilData = json_decode($councilFileContents, true); if (isset($options['output'])) {
$outputFile = $options['output'];
// Check for decoding errors
if (json_last_error() !== JSON_ERROR_NONE) {
error_log('Error decoding council file: ' . json_last_error_msg());
exit(1);
}
if (isset($options['candidates-file'])) {
$candidatesFile = $options['candidates-file'];
} else { } else {
error_log("Error: Missing required option '--candidates-file'."); error_log("Error: Missing required option '--output'.");
exit(1); exit(1);
} }
// Convert CSV into an array of dictionaries. Use the header as the key in the dictionary. if (isset($options['media'])) {
$candidateData = []; $mediaFolder = $options['media'];
if (($handle = fopen($candidatesFile, "r")) !== FALSE) { } else {
$headers = fgetcsv($handle); error_log("Error: Missing required option '--media'.");
exit(1);
}
$mediaFiles = scandir($mediaFolder);
if ($mediaFiles === FALSE) {
error_log("Failed to list files in media folder");
exit(1);
}
$candidates = [];
if (($handle = fopen($inputFile, "r")) !== FALSE) {
$currentWard = null;
$currentLine = 0;
while (($data = fgetcsv($handle)) !== FALSE) { while (($data = fgetcsv($handle)) !== FALSE) {
$candidate = []; $currentLine++;
foreach ($headers as $key => $value) { //echo var_dump($data);
$candidate[$value] = $data[$key]; if ($data[0] == "Ward") {
// CSV contains ward names in uppercase, convert them to a more readable form
$currentWard = ucwords(strtolower($data[1]));
// Handle some special cases where the above logic doesn't match the expected names
// Note that we cannot just convert every letter after a '-' character to uppercase
// because there are some ward names like "Bulleke-bek"
if ($currentWard == "Warrk-warrk") {
$currentWard = "Warrk-Warrk";
}
if ($currentWard == "Djirri-djirri") {
$currentWard = "Djirri-Djirri";
}
if ($currentWard == "Coastal-promontory") {
$currentWard = "Coastal-Promontory";
}
}
if ($data[0] == "Candidate") {
if ($currentWard == null) {
error_log("No ward found, skipping data on line " . $currentLine);
continue;
}
$candidateName = $data[1];
if ($candidateName == " example name") {
error_log("Skipping line ". $currentLine);
continue;
}
print("Adding candidate to " . $candidateName . " to ". $currentWard . "\n");
$name_split = explode(" ", $data[1]);
$name_patterns = [
implode(".*", $name_split),
implode(".*", array_reverse($name_split)),
];
$regex_groups = array_map(function($x) { return "(?:.*" . $x . ".*)"; }, $name_patterns);
$regex_pattern = "/" . implode("|", $regex_groups) . "/i";
$picture = "";
foreach ($mediaFiles as $mediaFile) {
if (preg_match($regex_pattern, $mediaFile)) {
$picture = $mediaFile;
break;
}
}
if ($picture === "") {
print("Failed to identify picture for " . $candidateName);
}
array_push(
$candidates,
[
"Ward" => $currentWard,
"Candidate Name" => $candidateName,
"Rating" => $data[2],
"Picture" => $picture
]
);
} }
$candidateData[] = $candidate;
} }
fclose($handle); fclose($handle);
} else { } else {
error_log('Error opening candidates file'); error_log('Error opening input file');
exit(1); exit(1);
} }
$candidateData = array_filter($candidateData, function ($candidate) use ($councilData) { if (empty($candidates)) {
return isset($candidate["Council"]) && $candidate["Council"] === $councilData['shortName']; error_log("Failed to find any candidates");
});
if (empty($candidateData)) {
error_log("Failed to load any candidates for " . $councilData['shortName']);
}
if (isset($options['media-file'])) {
$mediaFileContents = file_get_contents($options['media-file']);
} else {
error_log("Error: Missing required option '--media-file'.");
exit(1);
}
$mediaData = json_decode($mediaFileContents, true);
$renderer = new SPLPageRenderer();
$pageContent = $renderer->renderCouncilPage($councilData, $candidateData, $mediaData);
if ($pageContent === null) {
exit(2); exit(2);
} }
echo $pageContent; if (($handle = fopen($outputFile, "w")) !== FALSE) {
$headers = array(
"Ward",
"Candidate Name",
"Rating",
"Picture"
);
if (fputcsv($handle, $headers) === FALSE) {
error_log('Error writing headers to output file');
exit(3);
}
foreach ($candidates as $candidate) {
$fields = array(
$candidate["Ward"],
$candidate["Candidate Name"],
$candidate["Rating"],
$candidate["Picture"]
);
if (fputcsv($handle, $fields) === FALSE) {
error_log('Error writing candidate to output file');
exit(3);
}
}
} else {
error_log('Error opening output file');
exit(1);
}
print("Data written to " . $outputFile);
exit(0); exit(0);