diff --git a/student_auto_feed/add_drop_report.php b/student_auto_feed/add_drop_report.php new file mode 100644 index 0000000..b6ec03a --- /dev/null +++ b/student_auto_feed/add_drop_report.php @@ -0,0 +1,388 @@ +#!/usr/bin/env php +go(); +exit; + +/** + * Main process class + * + * @param array $argv + */ +class add_drop_report { + + /** @var string "Pass" as in which pass is being run: "1" or "2" */ + private $pass; + + /** @var string academic term / semester code. e.g. "f21" for Fall 2021 */ + private $term; + + public function __construct($argv) { + $this->pass = $argv[1]; + $this->term = $argv[2]; + } + + public function __destruct() { + db::close(); + } + + /** Main process flow + * + * $argv[1] = "1": First run to read course list and cache results to CSV temp file + * $argv[1] = "2": Second run to compare cached results with database and make report\ + */ + public function go() { + switch($this->pass) { + case "1": + // Record current course enrollments to temporary CSV + db::open(); + $courses = db::get_courses($this->term); + $mapped_courses = db::get_mapped_courses($this->term); + $enrollments = db::count_enrollments($this->term, $courses, $mapped_courses); + $course_enrollments = $enrollments[0]; + // ----------------------------------------------------------------- + reports::write_temp_csv($course_enrollments); + return null; + case "2": + // Read temporary CSV and compile and send add/drop report. + db::open(); + $courses = db::get_courses($this->term); + $mapped_courses = db::get_mapped_courses($this->term); + $enrollments = db::count_enrollments($this->term, $courses, $mapped_courses); + $course_enrollments = $enrollments[0]; + $manual_flags = $enrollments[1]; + // ----------------------------------------------------------------- + $prev_course_enrollments = reports::read_temp_csv(); + $report = reports::compile_report($prev_course_enrollments, $course_enrollments, $manual_flags); + reports::send_report($this->term, $report); + return null; + default: + die("Unrecognized pass \"{$this->pass}\"\n"); + } + } +} + +/** Static callback functions used with array_walk() */ +class callbacks { + /** Convert string to lowercase */ + public static function strtolower_cb(&$val, $key) { $val = strtolower($val); } + + /** Convert array to CSV data (as string) */ + public static function str_getcsv_cb(&$val, $key) { $val = str_getcsv($val, CSV_DELIM_CHAR); } +} + +/** Database static class */ +class db { + /** @var resource DB connection resource */ + private static $db = null; + + /** Open connection to DB */ + public static function open() { + // constants defined in config.php + $user = DB_LOGIN; + $host = DB_HOST; + $password = DB_PASSWORD; + + self::$db = pg_connect("host={$host} dbname=submitty user={$user} password={$password} sslmode=prefer"); + if (!self::check()) { + die("Failed to connect to DB\n"); + } + } + + /** Close connection to DB */ + public static function close() { + if (self::check()) { + pg_close(self::$db); + } + } + + /** + * Verify that DB connection resource is OK + * + * @access private + * @return bool true when DB connection resource is OK, false otherwise. + */ + private static function check() { + return is_resource(self::$db) && pg_connection_status(self::$db) === PGSQL_CONNECTION_OK; + } + + /** + * Retrieve course list from DB's courses table + * + * @param string $term + * @return string[] + */ + public static function get_courses($term) { + if (!self::check()) { + die("Not connected to DB when querying course list\n"); + } + + // Undergraduate courses from DB. + $sql = "SELECT course FROM courses WHERE semester=$1 AND status=1"; + $params = array($term); + $res = pg_query_params(self::$db, $sql, $params); + if ($res === false) + die("Failed to retrieve course list from DB\n"); + $course_list = pg_fetch_all_columns($res, 0); + array_walk($course_list, 'callbacks::strtolower_cb'); + + return $course_list; + } + + /** + * Retrieve mapped courses from DB's mapped_courses table + * + * @param $term + * @return string[] [course] => mapped_course + */ + public static function get_mapped_courses($term) { + if (!self::check()) { + die("Not connected to DB when querying mapped courses list\n"); + } + + // mapped courses from DB + $sql = "SELECT course, mapped_course FROM mapped_courses WHERE semester=$1"; + $params = array($term); + $res = pg_query_params(self::$db, $sql, $params); + if ($res === false) { + die("Failed to retrieve mapped courses from DB\n"); + } + + $keys = pg_fetch_all_columns($res, 0); + array_walk($keys, 'callbacks::strtolower_cb'); + $vals = pg_fetch_all_columns($res, 1); + array_walk($vals, 'callbacks::strtolower_cb'); + $mapped_courses = array_combine($keys, $vals); + + return $mapped_courses; + } + + /** + * Retrieve number of students (1) with manual flag set, (2) enrolled in courses + * + * @param $term + * @param $course_list + * @param $mapped_courses + * @return int[] ([0] => course enrollment counts, [1] => manual flag counts) + */ + public static function count_enrollments($term, $course_list, $mapped_courses) { + if (!self::check()) { + die("Not connected to DB when querying course enrollments\n"); + } + + $course_enrollments = array(); + $manual_flags = array(); + + foreach ($course_list as $course) { + $grad_course = array_search($course, $mapped_courses); + if ($grad_course === false) { + // COURSE HAS NO GRAD SECTION (not mapped). + $sql = "SELECT COUNT(*) FROM courses_users WHERE semester=$1 AND course=$2 AND user_group=4 AND registration_section IS NOT NULL"; + $params = array($term, $course); + $res = pg_query_params(self::$db, $sql, $params); + if ($res === false) + die("Failed to lookup enrollments for {$course}\n"); + $course_enrollments[$course] = (int) pg_fetch_result($res, 0); + + // Get manual flag count + $sql = "SELECT COUNT(*) FROM courses_users WHERE semester=$1 AND course=$2 AND user_group=4 AND registration_section IS NOT NULL AND manual_registration=TRUE"; + $res = pg_query_params(self::$db, $sql, $params); + if ($res === false) + die("Failed to lookup counts with manual flag set for {$course}\n"); + $manual_flags[$course] = (int) pg_fetch_result($res, 0); + } else { + // UNDERGRADUATE SECTION + $sql = "SELECT COUNT(*) FROM courses_users WHERE semester=$1 AND course=$2 AND user_group=4 AND registration_section='1'"; + $params = array($term, $course); + $res = pg_query_params(self::$db, $sql, $params); + if ($res === false) + die("Failed to lookup enrollments for {$course}\n"); + $course_enrollments[$course] = (int) pg_fetch_result($res, 0); + + // Get manual flag count + $sql = "SELECT COUNT(*) FROM courses_users WHERE semester=$1 AND course=$2 AND user_group=4 AND registration_section='1' AND manual_registration=TRUE"; + $res = pg_query_params(self::$db, $sql, $params); + if ($res === false) + die("Failed to lookup counts with manual flag set for {$course} (undergrads)\n"); + $manual_flags[$course] = (int) pg_fetch_result($res, 0); + + // GRADUATE SECTION + $sql = "SELECT COUNT(*) FROM courses_users WHERE semester=$1 AND course=$2 AND user_group=4 AND registration_section='2'"; + $res = pg_query_params(self::$db, $sql, $params); + if ($res === false) + die("Failed to lookup enrollments for {$grad_course}\n"); + $course_enrollments[$grad_course] = (int) pg_fetch_result($res, 0); + + // Get manual flag count + $sql = "SELECT COUNT(*) FROM courses_users WHERE semester=$1 AND course=$2 AND user_group=4 AND registration_section='2' AND manual_registration=TRUE"; + $res = pg_query_params(self::$db, $sql, $params); + if ($res === false) + die("Failed to lookup counts with manual flag set for {$course} (grads)\n"); + $manual_flags[$grad_course] = (int) pg_fetch_result($res, 0); + } + } + + // Courses make up array keys. Sort by courses. + ksort($course_enrollments); + ksort($manual_flags); + return array($course_enrollments, $manual_flags); + } +} + +/** Reports related methods */ +class reports { + /** + * Write course enrollment counts to temporary CSV file + * + * @param $course_enrollments + */ + public static function write_temp_csv($course_enrollments) { + $today = date("ymd"); + $tmp_path = ADD_DROP_FILES_PATH . "tmp/"; + $tmp_file = "{$today}.tmp"; + + if (!is_dir($tmp_path)) { + if (!mkdir($tmp_path, 0770, true)) { + die("Can't create tmp folder.\n"); + } + } + + $fh = fopen($tmp_path . $tmp_file, "w"); + if ($fh === false) { + die("Could not create temp file.\n"); + } + + foreach($course_enrollments as $course=>$num_students) { + fputcsv($fh, array($course, $num_students), CSV_DELIM_CHAR); + } + fclose($fh); + chmod($tmp_path . $tmp_file, 0660); + } + + /** + * Read temporary CSV file. Delete it when done. + * + * @return string[] "previous" course list of [course] => num_students + */ + public static function read_temp_csv() { + $today = date("ymd"); + $tmp_path = ADD_DROP_FILES_PATH . "tmp/"; + $tmp_file = "{$today}.tmp"; + + $csv = file($tmp_path . $tmp_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + if ($csv === false) { + die("Could not read temp file to prepare report.\n"); + } + + unlink($tmp_path . $tmp_file); // remove tmp file. + array_walk($csv, 'callbacks::str_getcsv_cb'); + // return array of array('course' => enrollment). e.g. ('csci1000' => 100) + return array_combine(array_column($csv, 0), array_column($csv, 1)); + } + + /** + * Compile $report from params' data + * + * @param $prev_course_enrollments + * @param $course_enrollments + * @param $manual_flags + * @return string $report + */ + public static function compile_report($prev_course_enrollments, $course_enrollments, $manual_flags) { + // Compile stats + $date = date("F j, Y"); + $time = date("g:i A"); + $report = "Student autofeed counts report for {$date} at {$time}\n"; + $report .= "NOTE: Difference and ratio do not account for the manual flag.\n"; + $report .= "COURSE YESTERDAY TODAY MANUAL DIFFERENCE RATIO\n"; + + foreach ($course_enrollments as $course=>$course_enrollment) { + // Calculate data + $prev_course_enrollment = array_key_exists($course, $prev_course_enrollments) ? $prev_course_enrollments[$course] : 0; + $manual_flag = array_key_exists($course, $manual_flags) ? $manual_flags[$course] : 0; + $diff = $course_enrollment - $prev_course_enrollment; + $ratio = $prev_course_enrollment != 0 ? abs(round(($diff / $prev_course_enrollment), 3)) : "N/A"; + + // Align into columns + $course = str_pad($course, 18, " ", STR_PAD_RIGHT); + $prev_course_enrollment = str_pad($prev_course_enrollment, 5, " ", STR_PAD_LEFT); + $course_enrollment = str_pad($course_enrollment, 5, " ", STR_PAD_LEFT); + $manual_flag = str_pad($manual_flag, 6, " ", STR_PAD_LEFT); + $diff = str_pad($diff, 10, " ", STR_PAD_LEFT); + + // Add row to report. + $report .= "{$course}{$prev_course_enrollment} {$course_enrollment} {$manual_flag} {$diff} {$ratio}\n"; + } + + return $report; + } + + /** + * Write $report to file. Optionally send $report by email. + * + * Email requires sendmail (or equivalent) installed and configured in php.ini. + * Emails are sent "unauthenticated". + * + * @param $term + * @param $repprt + */ + public static function send_report($term, $report) { + // Email stats (print stats if email is null or otherwise not sent) + if (!is_null(ADD_DROP_TO_EMAIL)) { + $date = date("M j, Y"); + $to = ADD_DROP_TO_EMAIL; + $from = ADD_DROP_FROM_EMAIL; + $subject = "Submitty Autofeed Add/Drop Report For {$date}"; + $report = str_replace("\n", "\r\n", $report); // needed for email formatting + $is_sent = mail($to, $subject, $report, array('from' => $from)); + if (!$is_sent) { + $report = str_replace("\r\n", "\n", $report); // revert back since not being emailed. + fprintf(STDERR, "Add/Drop report could not be emailed.\n%s", $report); + } + } + + // Write report to file. + $path = ADD_DROP_FILES_PATH . $term . "/"; + if (!is_dir($path)) { + if (!mkdir($path, 0770, true)) { + die("Cannot create reports path {$path}.\n"); + } + } + + $today = date("Y-m-d"); + file_put_contents("{$path}report_{$today}.txt", $report); + chmod("{$path}report_{$today}.txt", 0660); + } +} + +// EOF +?> diff --git a/student_auto_feed/config.php b/student_auto_feed/config.php index 79a40b8..72d73ab 100644 --- a/student_auto_feed/config.php +++ b/student_auto_feed/config.php @@ -5,7 +5,7 @@ * config.php script used by submitty_student_auto_feed * By Peter Bailie, Systems Programmer (RPI dept of computer science) * - * Requires minimum PHP version 7.0 with pgsql and iconv extensions. + * Requires minimum PHP version 7.1 with pgsql extension. * * Configuration of submitty_student_auto_feed is structured through a series * of named constants. @@ -36,7 +36,6 @@ // Univeristy campus's timezone. ***THIS NEEDS TO BE SET. date_default_timezone_set('Etc/UTC'); - /* Definitions for error logging -------------------------------------------- */ // While not recommended, email reports of errors may be disabled by setting // 'ERROR_EMAIL' to null. Please ensure the server running this script has @@ -44,17 +43,24 @@ define('ERROR_EMAIL', 'sysadmins@lists.myuniversity.edu'); define('ERROR_LOG_FILE', '/var/local/submitty/bin/auto_feed_error.log'); +//Properties for database access. ***THESE NEED TO BE SET. +define('DB_HOST', 'submitty.cs.myuniversity.edu'); +define('DB_LOGIN', 'my_database_user'); //DO NOT USE IN PRODUCTION +define('DB_PASSWORD', 'my_database_password'); //DO NOT USE IN PRODUCTION -//Student registration status is important, as data dumps can contain students -//who have dropped a course either before the semester starts or during the -//semester. This serialized array will contain all valid registered-student -//codes can be expected in the data dump. -//***THIS NEEDS TO BE SET as a serialized array. +// CSV_FILE is the full path of the student auto feed file, regardless if it is +// accessed locally or remotely. +define('CSV_FILE', '/path/to/datafile.csv'); + +// Student registration status is important, as data dumps can contain students +// who have dropped a course either before the semester starts or during the +// semester. This array will contain all valid registered-student codes can be +// expected in the data dump. // -//IMPORTANT: Consult with your University's IT administrator and/or registrar to -// add all pertinant student-is-registered codes that can be found in -// your CSV data dump. EXAMPLE: 'RA' may mean "registered by advisor" -// and 'RW' may mean "registered via web" +// IMPORTANT: Consult with your University's IT administrator and/or registrar +// to add all pertinant student-is-registered codes that can be found +// in your CSV data dump. EXAMPLE: 'RA' may mean "registered by +// advisor" and 'RW' may mean "registered via web" define('STUDENT_REGISTERED_CODES', array('RA', 'RW')); //An exceptionally small file size can indicate a problem with the feed, and @@ -68,24 +74,15 @@ //CSV that are not needed by submitty_student_auto_feed). define('VALIDATE_NUM_FIELDS', 10); -// The following constants are used to read the CSV auto feed file provided by -// the registrar / data warehouse. ***THESE NEED TO BE SET. -// -// CSV_FILE is the full path of the student auto feed file, regardless if it is -// accessed locally or remotely. -define('CSV_FILE', '/path/to/datafile.csv'); +//What ratio of dropped student enrollments is suspiciously too high, which may +//indicate a problem with the CSV data file -- between 0 and 1. Set to NULL to +//disable this check. +define('VALIDATE_DROP_RATIO', 0.5); //Define what character is delimiting each field. ***THIS NEEDS TO BE SET. //EXAMPLE: chr(9) is the tab character. define('CSV_DELIM_CHAR', chr(9)); -//Properties for database access. ***THESE NEED TO BE SET. -//If multiple instances of Submitty are being supported, these may be defined as -//parrallel arrays. -define('DB_HOST', 'submitty.cs.myuniversity.edu'); -define('DB_LOGIN', 'my_database_user'); //DO NOT USE IN PRODUCTION -define('DB_PASSWORD', 'my_database_password'); //DO NOT USE IN PRODUCTION - /* The following constants identify what columns to read in the CSV dump. --- */ //these properties are used to group data by individual course and student. //NOTE: If your University does not support "Student's Preferred Name" in its @@ -108,6 +105,9 @@ //Header row, if it exists, must be discarded during processing. define('HEADER_ROW_EXISTS', true); +//Allows "\r" EOL encoding. This is rare but exists (e.g. Excel for Macintosh). +ini_set('auto_detect_line_endings', true); + //Remote IMAP //This is used by imap_remote.php to login and retrieve a student enrollment //datasheet, should datasheets be provided via an IMAP email box. This also @@ -142,14 +142,20 @@ define('JSON_REMOTE_PASSWORD', 'json_password'); //DO NOT USE IN PRODUCTION define('JSON_REMOTE_PATH', '/path/to/files/'); -//Sometimes data feeds are generated by Windows systems, in which case the data -//file probably needs to be converted from Windows-1252 (aka CP-1252) to UTF-8. -//Set to true to convert data feed file from Windows char encoding to UTF-8. -//Set to false if data feed is already provided in UTF-8. -define('CONVERT_CP1252', true); +// Add/Drop Reporting +// Where to email reports. Set to null to disable sending email. +// Sendmail (or equivalent) needs to be installed on the server and configured +// in php.ini. Reports are sent "unauthenticated". +define('ADD_DROP_TO_EMAIL', "admin@cs.myuniversity.edu"); -//Allows "\r" EOL encoding. This is rare but exists (e.g. Excel for Macintosh). -ini_set('auto_detect_line_endings', true); +// Where emailed reports are sent from. +// Doesn't actually have to be an account on the server running this script, so +// a common email address or mailing list for sysadmins is fine. +define('ADD_DROP_FROM_EMAIL', "sysadmins@lists.myuniversity.edu"); + +// Base dir where reports are written. They will be further sorted to sub dirs +// 'tmp' or the current semester code. +define('ADD_DROP_FILES_PATH', "path/to/reports/"); //EOF ?> diff --git a/student_auto_feed/readme.md b/student_auto_feed/readme.md index d0c6ccc..80e8114 100644 --- a/student_auto_feed/readme.md +++ b/student_auto_feed/readme.md @@ -1,5 +1,5 @@ # Submitty Student Auto Feed Script -Readme last updated Sept 22, 2021 +Readme last updated Nov 17, 2021 This is a code example for any University to use as a basis to have Submitty course's enrollment data added or updated on an automated schedule with a student enrollment CSV datasheet. @@ -10,11 +10,9 @@ policies and practices.__ Detailed instructions can be found at [http://submitty.org/sysadmin/student\_auto\_feed](http://submitty.org/sysadmin/student_auto_feed) -Please use PHP 7.0 or higher. - -## config.php -A series of `define` statements that is used to configure the auto feed script. -Code comments will help explain usage. +Requirements: PHP 7.1 or higher with pgsql extension. `imap_remote.php` also +requires the imap extension. This system is intended to be platform agnostic, +but has been developed and tested with Ubuntu Linux. ## submitty\_student\_auto\_feed.php A command line executable script to read a student enrollment data CSV file and @@ -38,8 +36,6 @@ The auto feed script does not need to be run specifically on the Submitty server, but it will need access to the Submitty "master" database and the enrollment CSV data file. -Requires PHP's pgsql and iconv extensions. - ### Command Line Arguments `-t` Specify the term code for the currently active term. Required. @@ -56,6 +52,20 @@ This overrides database authentication set in config.php. Optional. `-l` Test log reporting. This can be used to test that logs are being sent/delivered by email. This does not process a data CSV. Optional. +## config.php +A series of `define` statements that is used to configure the auto feed script. +Code comments will help explain usage. This file must exist in the same directory +as `submitty_student_auto_feed.php`. + +## Class files +`ssaf_cli.php` +`ssaf_db.php` +`ssaf_sql.php` +`ssaf_validate.php` + +These are class files that are required to run the submitty student auto feed +script. They must exist in the same directory as `submitty_student_auto_feed.php`. + ## imap\_remote.php This is a helper script that will retrieve a CSV data sheet from an imap email @@ -64,8 +74,9 @@ account. The data retrieved is written to the same path and file used by and the data should be available to `submitty_student_auto_feed.php` for processing. -Configuration is read from `config.php`. No command line options. Requires the -PHP imap extension. +Configuration is read from `config.php`. No command line options. + +__Requires the PHP imap extension.__ ## json\_remote.php @@ -101,3 +112,36 @@ For Example: This will first run `imap_remote.php` to retrieve student enrollment data, then run `submitty_student_auto_feed.php` with command line arguments `-t s18` and `-a dbuser:dbpassword@dbserver.edu`. + +## add_drop_report.php + +Script used to compile reports on how many students have dropped among all +courses registered in Submitty. + +This script should be run before the autofeed and again after the autofeed. +The first run will read the database and write a temporary file of course +enrollment numbers. The second run will read the temporary file and compare +it with the enrollment numbers in the database -- which may have changed. + +The enrollment report will be saved as a text file. Optionally, this report +can be emailed. Note that the email function requires `sendmail` or equivalent, +and the emails will be sent unauthenticated. + +### Command Line Parameters + +The first cli parameter must be either `1` or `2` to designate whether this is +the first (prior to autofeed) or second (after auto feed) run. + +Second cli parameter is the term code. + +For example: +``` +$ ./add_drop_report.php 1 f21 +``` +Will invoke the _first_ run to cache enrollment values to a temporary file for +the Fall 2021 term. +``` +$ ./add_drop_report.php 2 f21 +``` +Will invoke the _second_ run to create the report of student enrollments for the +Fall 2021 term. diff --git a/student_auto_feed/ssaf_cli.php b/student_auto_feed/ssaf_cli.php new file mode 100644 index 0000000..1ae11ad --- /dev/null +++ b/student_auto_feed/ssaf_cli.php @@ -0,0 +1,47 @@ + diff --git a/student_auto_feed/ssaf_db.php b/student_auto_feed/ssaf_db.php new file mode 100644 index 0000000..6d0e85b --- /dev/null +++ b/student_auto_feed/ssaf_db.php @@ -0,0 +1,265 @@ + $row['mapped_course'], + 'mapped_section' => $row['mapped_section'] + ); + } + } + + return $mappings; + } + + /** + * Get student enrollment count for a specific semester and course. + * + * @param string $term + * @param string $course + * @return bool|string Enrollment count (as string) or FALSE on DB error. + */ + public static function get_enrollment_count($semester, $course) { + self::$error = null; + if (!self::check()) { + return false; + } + + $results = self::run_query(sql::GET_COURSE_ENROLLMENT_COUNT, array($semester, $course)); + if ($results === false) { + self::$error .= "Error while retrieving course enrollment counts."; + return false; + } + + return $results[0]['num_students']; + } + + /** + * Upsert $rows to Submitty master database. + * + * If an error occurs, this function returns FALSE. self::run_query() will + * set the error reported by Postgresql, and this function will concat more + * context. + * Switch/case blocks need to check each run_query() cases explicitely for + * false as switch/case does not do strict comparisons, and run_query() can + * return null (inferred as false) when a query has no errors and also + * produces no results (mainly non-SELECT queries). + * + * @param string $semester Term course code. e.g. "f20" for Fall 2020. + * @param string $course Course code related to $rows. + * @param array $rows Data rows read from CSV file to be UPSERTed to database + * @return bool TRUE on success, FALSE on error. + */ + public static function upsert($semester, $course, $rows) : bool { + self::$error = null; + if (!self::check()) { + return false; + } + + // Setup DB transaction. + // If any query returns false, we need to bail out before upserting. + switch(true) { + case self::run_query(sql::CREATE_TMP_TABLE, null) === false: + self::$error .= "\nError during CREATE tmp table, {$course}"; + return false; + case self::run_query(sql::BEGIN, null) === false: + self::$error .= "\nError during BEGIN transaction, {$course}"; + return false; + case self::run_query(sql::LOCK_COURSES, null) === false: + self::$error .= "\nError during LOCK courses table, {$course}"; + return false; + case self::run_query(sql::LOCK_REG_SECTIONS, null) === false: + self::$error .= "\nError during LOCK courses_registration_sections table, {$course}"; + return false; + case self::run_query(sql::LOCK_COURSES_USERS, null) === false: + self::$error .= "\nError during LOCK courses_users table, {$course}"; + return false; + } + + // Do upsert of course enrollment data. + foreach($rows as $row) { + $users_params = array( + $row[COLUMN_USER_ID], + $row[COLUMN_NUMERIC_ID], + $row[COLUMN_FIRSTNAME], + $row[COLUMN_LASTNAME], + $row[COLUMN_PREFERREDNAME], + $row[COLUMN_EMAIL] + ); + + $courses_users_params = array( + $semester, + $course, + $row[COLUMN_USER_ID], + 4, + $row[COLUMN_SECTION], + "FALSE" + ); + + $reg_sections_params = array($semester, $course, $row[COLUMN_SECTION]); + $tmp_table_params = array($row[COLUMN_USER_ID]); + $dropped_users_params = array($semester, $course); + + // Upsert queries + // If any query returns false, we need to rollback and bail out. + switch(true) { + case self::run_query(sql::UPSERT_USERS, $users_params) === false: + self::run_query(sql::ROLLBACK, null); + self::$error .= "\nError during UPSERT users table, {$course}\n"; + return false; + case self::run_query(sql::INSERT_REG_SECTION, $reg_sections_params) === false: + self::run_query(sql::ROLLBACK, null); + self::$error .= "\nError during INSERT courses_registration_sections table, {$course}\n"; + return false; + case self::run_query(sql::UPSERT_COURSES_USERS, $courses_users_params) === false: + self::run_query(sql::ROLLBACK, null); + self::$error .= "\nError during UPSERT courses_users table, {$course}\n"; + return false; + case self::run_query(sql::INSERT_TMP_TABLE, $tmp_table_params) === false: + self::run_query(sql::ROLLBACK, null); + self::$error .= "\nError during INSERT temp table (enrolled student who hasn't dropped), {$course}\n"; + return false; + } + } // END row by row processing. + + // Finish up by checking for dropped students. + if (self::run_query(sql::DROPPED_USERS, $dropped_users_params) === false) { + self::run_query(sql::ROLLBACK, null); + self::$error .= "\nError processing dropped students, {$course}\n"; + return false; + } + + // All data has been upserted. Complete transaction and return success or failure. + return self::run_query(sql::COMMIT, null) !== false; + } + + // PRIVATE STATIC FUNCTIONS ------------------------------------------------ + + private static function check() : bool { + if (!is_resource(self::$db) || pg_connection_status(self::$db) !== PGSQL_CONNECTION_OK) { + self::$error = "No DB connection."; + return false; + } + + return true; + } + + /** + * Run SQL query with parameters + * + * Uses pg_query_params() to run the query to help ensure that pertinant + * data is properly escaped. Returns NULL when there are no results, such + * as with a INSERT or UPDATE query. Be careful that FALSE and NULL are + * equivalent when loosely compared. + * + * @param string $sql SQL query + * @param array $params Query parameters + * @return mixed FALSE on error. Array of results or NULL on success. + */ + private static function run_query($sql, $params = null) { + if (!self::check()) { + return false; + } + + if (is_null($params)) $params = array(); + else if (!is_array($params)) $params = array($params); + + $res = pg_query_params(self::$db, $sql, $params); + if ($res === false) { + // pg_result_error() doesn't work here as $res is no longer a result resource. + self::$error = pg_last_error(self::$db); + return false; + } + + $result = pg_fetch_all($res, PGSQL_ASSOC); + return $result !== false ? $result : null; + } +} + +// EOF +?> diff --git a/student_auto_feed/ssaf_sql.php b/student_auto_feed/ssaf_sql.php new file mode 100644 index 0000000..9a22015 --- /dev/null +++ b/student_auto_feed/ssaf_sql.php @@ -0,0 +1,133 @@ + diff --git a/student_auto_feed/ssaf_validate.php b/student_auto_feed/ssaf_validate.php new file mode 100644 index 0000000..a3c6a0a --- /dev/null +++ b/student_auto_feed/ssaf_validate.php @@ -0,0 +1,155 @@ +@\"[] + 2. Address recipient may not start or end with characters !#$%'*+-/=?^_`{| + 3. Address recipient and hostname must be delimited with @ character + 4. Address hostname labels are delimited by the . character + 5. Address hostname labels may contain alphanumeric and - character + 6. Address hostname labels may not start or end with - character + 7. Address top level domain may be alphabetic only and minimum length of 2 + 8. The entire email address is case insensitive + + Peter Bailie, Oct 29 2021 +----------------------------------------------------------------------------- */ + +namespace ssaf; + +/** @author Peter Bailie, Rensselaer Polytechnic Institute */ +class validate { + + /** @var null|string $error Holds any generated error messages */ + public static $error = null; + + /** + * Validate $row is suitable for processing. + * + * trim() to remove whitespace is applied to all columns. trim() to remove + * trailing zeroes is applied to the registration section column, when + * column is all numeric characters. + * + * @param array $row Data row to validate. + * @param int $row_num Data row number from CSV file (for error messaging). + * @return bool TRUE when $row is successfully validated, FALSE otherwise. + */ + public static function validate_row($row, $row_num) : bool { + self::$error = null; + $validate_num_fields = VALIDATE_NUM_FIELDS; + $num_fields = count($row); + + switch(false) { + // Make sure $row has the expected number of fields. + case $num_fields === $validate_num_fields: + self::$error = "Row {$row_num} has {$num_fields} columns. {$validate_num_fields} expected."; + return false; + // Check term code (skips when set to null). + case is_null(EXPECTED_TERM_CODE) ? true : $row[COLUMN_TERM_CODE] === EXPECTED_TERM_CODE: + self::$error = "Row {$row_num} failed validation for unexpected term code \"{$row[COLUMN_TERM_CODE]}\"."; + return false; + // User ID must contain only lowercase alpha, numbers, underscore, and hyphen + case boolval(preg_match("/^[a-z0-9_\-]+$/", $row[COLUMN_USER_ID])): + self::$error = "Row {$row_num} failed user ID validation \"{$row[COLUMN_USER_ID]}\"."; + return false; + // First name must be alpha characters, white-space, or certain punctuation. + case boolval(preg_match("/^[a-zA-Z'`\-\. ]+$/", $row[COLUMN_FIRSTNAME])): + self::$error = "Row {$row_num} failed validation for student first name \"{$row[COLUMN_FIRSTNAME]}\"."; + return false; + // Last name must be alpha characters, white-space, or certain punctuation. + case boolval(preg_match("/^[a-zA-Z'`\-\. ]+$/", $row[COLUMN_LASTNAME])): + self::$error = "Row {$row_num} failed validation for student last name \"{$row[COLUMN_LASTNAME]}\"."; + return false; + // Student registration section must be alphanumeric, '_', or '-'. + case boolval(preg_match("/^[a-zA-Z0-9_\-]+$/", $row[COLUMN_SECTION])): + self::$error = "Row {$row_num} failed validation for student section \"{$row[COLUMN_SECTION]}\"."; + return false; + // Check email address is properly formed. + case boolval(preg_match("/^(?![!#$%'*+\-\/=?^_`{|])[^(),:;<>@\\\"\[\]]+(? $b[COLUMN_USER_ID]; }); + + $user_ids = array(); + $are_all_unique = true; // Unless proven FALSE + $length = count($rows); + for ($i = 1; $i < $length; $i++) { + $j = $i - 1; + if ($rows[$i][COLUMN_USER_ID] === $rows[$j][COLUMN_USER_ID]) { + $are_all_unique = false; + $user_ids[] = $rows[$i][COLUMN_USER_ID]; + } + } + + return $are_all_unique; + } + + /** + * Validate that there isn't an excessive drop ratio in course enrollments. + * + * An excessive ratio of dropped enrollments may indicate a problem with + * the data sheet. Dropped enrollments can be either indicated by a row + * with a unacceptable registration code, or more critically THAT USER'S + * DATA ROW WAS OMITTED FROM THE DATASHEET. In the latter case, it is + * difficult to tell when missing data is regular or improper. Therefore + * this check relies on the config setting VALIDATE_DROP_RATIO as a + * confidence setting to indicate that processing must be aborted to + * (possibly? probably?) preserve data integrity. Returns TRUE when + * validation is OK. That is, ratio of dropped students is within + * confidence. Or the ratio did not go beyond the cutoff. + * + * @param array $rows Data rows for one course + * @param string $term Current term code. e.g. 'f21' for Fall 2021. + * @param string $course Course code for course's data. + * @return bool TRUE when validation OK, FALSE when validation failed. + */ + public static function check_for_excessive_dropped_users(array $rows, string $term, string $course, &$diff, &$ratio) : bool { + // This check is disabled when VALIDATE_DROP_RATIO is set NULL. + if (is_null(VALIDATE_DROP_RATIO)) return true; + + $ratio_cutoff = VALIDATE_DROP_RATIO * -1; + $current_enrollments = (int) db::get_enrollment_count($term, $course); + $new_enrollments = count($rows); + + /* --------------------------------------------------------------------- + Dropped students shows a reduction in enrollment, and therefore the + difference will be a negative value to calculate the ratio, resulting + in a negative ratio. A calculated ratio that is *smaller* or equals + the cutoff fails validation. A positive ratio indicates students + adding the course, in which case validation is OK. + + If $current_enrollments are 0, the course is empty of students and + there can be no dropped students. Also, division by 0. + Only possible response is TRUE (validate OK), so set to 1.0 to ensure + ratio is always higher than the cutoff. + --------------------------------------------------------------------- */ + $diff = $new_enrollments - $current_enrollments; + $ratio = $current_enrollments !== 0 ? $diff / $current_enrollments : 1.0; + return $ratio > $ratio_cutoff; + } +} + +// EOF +?> diff --git a/student_auto_feed/submitty_student_auto_feed.php b/student_auto_feed/submitty_student_auto_feed.php index 740fe71..3b7dc48 100755 --- a/student_auto_feed/submitty_student_auto_feed.php +++ b/student_auto_feed/submitty_student_auto_feed.php @@ -1,76 +1,58 @@ #!/usr/bin/env php array(), 'courses_users' => array()); +// Important: Make sure we are running from CLI +if (php_sapi_name() !== "cli") { + die("This is a command line tool."); +} - /** @static @property string $log_msg_queue ongoing string of messages to write to logfile */ - private static $log_msg_queue = ""; +$proc = new submitty_student_auto_feed(); +$proc->go(); +exit; +/** primary process class */ +class submitty_student_auto_feed { + /** @var resource File handle to read CSV */ + private $fh; + /** @var string Semester code */ + private $semester; + /** @var array List of courses registered in Submitty */ + private $course_list; + /** @var array Describes how courses are mapped from one to another */ + private $mapped_courses; + /** @var array Courses with invalid data. */ + private $invalid_courses; + /** @var array All CSV data to be upserted */ + private $data; + /** @var string Ongoing string of messages to write to logfile */ + private $log_msg_queue; + + /** Init properties. Open DB connection. Open CSV file. */ public function __construct() { - - //Important: Make sure we are running from CLI - if (PHP_SAPI !== "cli") { - die("This is a command line tool."); - } - - //Get semester from CLI arguments. + // Get semester from CLI arguments. $opts = cli_args::parse_args(); if (array_key_exists('l', $opts)) { $this->log_it("Logging test requested. There is no actual error to report."); exit; } + $this->semester = $opts['t']; - self::$semester = $opts['t']; - - //Connect to "master" submitty DB. + // Connect to "master" submitty DB. if (array_key_exists('a', $opts)) { $db_user = strtok($opts['a'], ":"); $db_password = strtok("@"); @@ -80,389 +62,267 @@ public function __construct() { $db_password = DB_PASSWORD; $db_host = DB_HOST; } - self::$db = pg_connect("host={$db_host} dbname=submitty user={$db_user} password={$db_password} sslmode=require"); - //Make sure there's a DB connection to Submitty. - if (pg_connection_status(self::$db) !== PGSQL_CONNECTION_OK) { - $this->log_it("Error: Cannot connect to submitty DB"); - } else { - //Get course list - self::$course_list = $this->get_participating_course_list(); - - //Create arrays to hold enrollment data by course. - foreach (self::$course_list as $course) { - self::$data['courses_users'][$course] = array(); - } - - //Get mapped_courses list (when one class is merged into another) - self::$course_mappings = $this->get_course_mappings(); - - //Auto-run class processes by executing them in constructor. - //Halts when FALSE is returned by a method. - switch(false) { - //Load CSV data - case $this->open_csv(): - $this->log_it("Student CSV data could not be read."); - exit(1); - //Validate CSV data (anything pertinent is stored in self::$data property) - case $this->validate_csv(): - $this->log_it("Student CSV data failed validation. No data upsert performed."); - exit(1); - //Data upsert - case $this->upsert_psql(): - $this->log_it("Error during upsert of data."); - exit(1); - } + if (!$this->open_csv()) { + $this->log_it("Error: Cannot open CSV file"); + exit(1); } - //END EXECUTION - exit(0); - } - - public function __destruct() { - - //Graceful cleanup. - - //Close DB connection, if it exists. - if (pg_connection_status(self::$db) === PGSQL_CONNECTION_OK) { - pg_close(self::$db); + if (!db::open($db_host, $db_user, $db_password)) { + $this->log_it("Error: Cannot connect to Submitty DB"); + exit(1); } - //Unlock CSV, if it is locked. - if (self::$fh_locked) { - flock(self::$fh, LOCK_UN); + // Get course list + $error = null; + $this->course_list = db::get_course_list($this->semester, $error); + if ($this->course_list === false) { + $this->log_it($error); + exit(1); } - //Close CSV file, if it is open. - if (self::$fh !== false) { - fclose(self::$fh); + // Get mapped_courses list (when one class is merged into another) + $this->mapped_courses = db::get_mapped_courses($this->semester, $error); + if ($this->mapped_courses === false) { + $this->log_it($error); + exit(1); } + // Init other properties. + $this->invalid_courses = array(); + $this->data = array(); + $this->log_msg_queue = ""; + } + + public function __destruct() { + db::close(); + $this->close_csv(); + //Output logs, if any. - if (!empty(self::$log_msg_queue)) { + if ($this->log_msg_queue !== "") { + error_log($this->log_msg_queue, 3, ERROR_LOG_FILE); // to file if (!is_null(ERROR_EMAIL)) { - $is_sent = error_log(self::$log_msg_queue, 1, ERROR_EMAIL); //to email - fprintf(STDERR, "%s", self::$log_msg_queue); + $is_sent = error_log($this->log_msg_queue, 1, ERROR_EMAIL); // to email if (!$is_sent) { - $this->log_it("Error log could not be sent by email."); - fprintf(STDERR, "Error log could not be sent by email.%s", PHP_EOL); + // This gives cron a chance to email the log to a sysadmin. + fprintf(STDERR, "PHP could not send error log by email.\n%s", $this->log_msg_queue); } } + } + } - error_log(self::$log_msg_queue, 3, ERROR_LOG_FILE); //to file + /** Main process workflow */ + public function go() { + switch(false) { + case $this->get_csv_data(): + $this->log_it("Error getting CSV data."); + break; + case $this->check_for_excessive_dropped_users(): + // This check will block all upserts when an error is detected. + exit(1); + case $this->check_for_duplicate_user_ids(): + $this->log_it("Duplicate user IDs detected in CSV file."); + break; + case $this->invalidate_courses(): + // Should do nothing when $this->invalid_courses is empty + $this->log_it("Error when removing data from invalid courses."); + break; + case $this->upsert_data(): + $this->log_it("Error during upsert."); + break; } } /** - * Run some error checks and copy file data to class property. + * Read CSV file and sort data into $this->data. + * + * The sorting process includes ensuring the data row is associated with an + * active course in Submitty and that the data row passes a series of + * validation checks. When a row fails validation, we flag that course in + * $this->invalid_courses, which later will be used to prevent that course + * from being upserted to preserve data integrity. * - * @access private - * @return boolean indicates success that CSV data passes validation tests + * @see validate::validate_row() Row validation method in ssaf_validate.php */ - private function validate_csv() { - - if (self::$fh === false) { - $this->log_it("CSV file handle invalid when starting CSV data validation."); + private function get_csv_data() { + if (!is_resource($this->fh) || get_resource_type($this->fh) !== "stream") { + $this->log_it("CSV file not open when get_csv_data() called."); return false; } - //Consume and discard header row, if it exists, and init $row_number. + // Consume/discard header row, if it exists. if (HEADER_ROW_EXISTS) { - fgets(self::$fh); - $row_number = 1; + fgets($this->fh); + $row_num = 2; } else { - $row_number = 0; + $row_num = 1; } - //Prepare validation - //$validation_flag will invalidate the entire CSV when set to false. - //A log of all failing rows is desired, so we do not bail out of this process at the first sign of invalidation. - $validation_flag = true; - $validate_num_fields = VALIDATE_NUM_FIELDS; - $rpi_found_non_empty_row = false; //RPI edge case flag where top row(s) of CSV might have empty data. - - while (($row = fgetcsv(self::$fh, 0, CSV_DELIM_CHAR)) !== false) { - //Current row number (needed for error logging). - $row_number++; - + // Read and assign csv rows into $this->data array + $row = fgetcsv($this->fh, 0, CSV_DELIM_CHAR); + while(!feof($this->fh)) { //Trim whitespace from all fields in $row - array_walk($row, 'trim'); - - //BEGIN VALIDATION - //Invalidate any row that doesn't have requisite number of fields. Do this, first. - //Invalidation will disqualify the data file to protect DB data integrity. - $num_fields = count($row); - if ($num_fields !== $validate_num_fields) { - $this->log_it("Row {$row_number} has {$num_fields} columns. {$validate_num_fields} expected. CSV disqualified."); - $validation_flag = false; - continue; - } else if (empty(array_filter($row, function($field) { return !empty($field); }))) { - //RPI edge case to skip a correctly sized row of all empty fields — at the top of a data file, before proper data is read — without invalidating the whole data file. - if (!$rpi_found_non_empty_row) { - $this->log_it("Row {$row_number} is correct size ({$validate_num_fields}), but all columns are empty — at top of CSV. Ignoring row."); - continue; - } else { - //Correctly sized empty row below data row(s) — invalidate data file. - $this->log_it("Row {$row_number} is correct size ({$validate_num_fields}), but all columns are empty — below a non-empty data row. CSV disqualified."); - $validation_flag = false; - continue; - } - } + array_walk($row, function(&$val, $key) { $val = trim($val); }); - $rpi_found_non_empty_row = true; - $course = strtolower($row[COLUMN_COURSE_PREFIX]) . $row[COLUMN_COURSE_NUMBER]; // Remove any leading zeroes from "integer" registration sections. - $section = (ctype_digit($row[COLUMN_SECTION])) ? ltrim($row[COLUMN_SECTION], "0") : $row[COLUMN_SECTION]; - - //Row validation filters. If any prove false, row is discarded. - switch(false) { - //Check to see if course is participating in Submitty or a mapped course. - case (in_array($course, self::$course_list) || array_key_exists($course, self::$course_mappings)): - continue 2; - //Check that row shows student is registered. - case (in_array($row[COLUMN_REGISTRATION], STUDENT_REGISTERED_CODES)): - continue 2; - } - - //Row is OK, next validate row columns. - //If any column is invalid, the row is skipped and the entire data file is disqualified. - switch(false) { - //Check term code (skips when set to null). - case ((is_null(EXPECTED_TERM_CODE)) ? true : ($row[COLUMN_TERM_CODE] === EXPECTED_TERM_CODE)): - $this->log_it("Row {$row_number} failed validation for mismatched term code."); - $validation_flag = false; - continue 2; - //User ID must contain only lowercase alpha, numbers, underscore, and hyphen - case boolval((preg_match("~^[a-z0-9_\-]+$~", $row[COLUMN_USER_ID]))): - $this->log_it("Row {$row_number} failed user ID validation ({$row[COLUMN_USER_ID]})."); - $validation_flag = false; - continue 2; - //First name must be alpha characters, white-space, or certain punctuation. - case boolval((preg_match("~^[a-zA-Z'`\-\. ]+$~", $row[COLUMN_FIRSTNAME]))): - $this->log_it("Row {$row_number} failed validation for student first name ({$row[COLUMN_FIRSTNAME]})."); - $validation_flag = false; - continue 2; - //Last name must be alpha characters, white-space, or certain punctuation. - case boolval((preg_match("~^[a-zA-Z'`\-\. ]+$~", $row[COLUMN_LASTNAME]))): - $this->log_it("Row {$row_number} failed validation for student last name ({$row[COLUMN_LASTNAME]})."); - $validation_flag = false; - continue 2; - //Student registration section must be alphanumeric, '_', or '-'. - case boolval((preg_match("~^[a-zA-Z0-9_\-]+$~", $row[COLUMN_SECTION]))): - $this->log_it("Row {$row_number} failed validation for student section ({$section})."); - $validation_flag = false; - continue 2; - //Check email address for appropriate format. e.g. "student@university.edu", "student@cs.university.edu", etc. - case boolval((preg_match("~^[^(),:;<>@\\\"\[\]]+@(?!\-)[a-zA-Z0-9\-]+(?log_it("Row {$row_number} failed validation for student email ({$row[COLUMN_EMAIL]})."); - $validation_flag = false; - continue 2; - } - - /* ----------------------------------------------------------------- - * $row successfully validated. Include it. - * NOTE: Most cases, $row is associated EITHER as a registered - * course or as a mapped course, but it is possible $row is - * associated as BOTH a registered course and a mapped course. - * -------------------------------------------------------------- */ - - //Include $row in self::$data as a registered course, if applicable. - if (in_array($course, self::$course_list)) { - $this->include_row($row, $course, $section); - } - - //Include $row in self::$data as a mapped course, if applicable. - if (array_key_exists($course, self::$course_mappings)) { - if (array_key_exists($section, self::$course_mappings[$course])) { - $tmp_course = $course; - $tmp_section = $section; - $course = self::$course_mappings[$tmp_course][$tmp_section]['mapped_course']; - $section = self::$course_mappings[$tmp_course][$tmp_section]['mapped_section']; - $this->include_row($row, $course, $section); - } else { - //Course mapping is needed, but section is not correctly entered in DB. - //Invalidate data file so that upsert is not performed as a safety precaution for system data integrity. - $this->log_it("Row {$row_number}: {$course} has been mapped. Section {$section} is in feed, but not mapped."); - $validation_flag = false; + if (ctype_digit($row[COLUMN_SECTION])) $row[COLUMN_SECTION] = ltrim($row[COLUMN_SECTION], "0"); + + $course = strtolower($row[COLUMN_COURSE_PREFIX] . $row[COLUMN_COURSE_NUMBER]); + + // Does $row have a valid registration code? + if (array_search($row[COLUMN_REGISTRATION], STUDENT_REGISTERED_CODES) !== false) { + // Check that $row is associated with the course list + if (array_search($course, $this->course_list) !== false) { + if (validate::validate_row($row, $row_num)) { + $this->data[$course][] = $row; + } else { + $this->invalid_courses[$course] = true; + $this->log_it(validate::$error); + } + // Instead, check that the $row is associated with mapped course + } else if (array_key_exists($course, $this->mapped_courses)) { + $section = $row[COLUMN_SECTION]; + // Also verify that the section is mapped. + if (array_key_exists($section, $this->mapped_courses[$course])) { + $m_course = $this->mapped_courses[$course][$section]['mapped_course']; + if (validate::validate_row($row, $row_num)) { + $row[COLUMN_SECTION] = $this->mapped_courses[$course][$section]['mapped_section']; + $this->data[$m_course][] = $row; + } else { + $this->invalid_courses[$m_course] = true; + $this->log_it(validate::$error); + } + } } } - } //END iterating over CSV data. - - //Bulk of proccesing time is during database upsert, so we might as well - //release the CSV now that we are done reading it. - if (self::$fh_locked && flock(self::$fh, LOCK_UN)) { - self::$fh_locked = false; - } - if (self::$fh !== false && fclose(self::$fh)) { - self::$fh = false; + $row = fgetcsv($this->fh, 0, CSV_DELIM_CHAR); + $row_num++; } /* --------------------------------------------------------------------- - * In the event that a course is registered with Submitty, but that - * course is NOT in the CSV data, that course needs to be removed - * (filtered out) from self::$data or else all of its student enrollment - * will be moved to the NULL section during upsert. This is determined - * when a course has zero rows of student enrollment. - * ------------------------------------------------------------------ */ + There may be "fake" or "practice" courses in Submitty that shouldn't be + altered by the autofeed. These courses will have no enrollments in the + csv file as these courses are not recognized by the registrar. + --------------------------------------------------------------------- */ - self::$data['courses_users'] = array_filter(self::$data['courses_users'], function($course) { return !empty($course); }, 0); + // Filter out any "empty" courses so they are not processed. + // There shouldn't be any "empty" course data, but this is just in case. + $this->data = array_filter($this->data, function($course) { return !empty($course); }, 0); - /* --------------------------------------------------------------------- - * Individual students can be listed on multiple rows if they are - * enrolled in two or more courses. 'users' table needs to be - * deduplicated. Deduplication will be keyed by 'user_id' since that is - * also the table's primary key. Note that 'courses_users' should NOT - * be deduplicated. - * ------------------------------------------------------------------ */ - - if ($this->deduplicate('users', 'user_id') === false) { - - //Deduplication didn't work. We can't proceed (set validation flag to false). - $this->log_it("Users data deduplication encountered a problem. Aborting."); - $validation_flag = false; - } + // Most runtime involves the database, so we'll release the CSV now. + $this->close_csv(); - //TRUE: Data validation passed and validated data set will have at least 1 row per table. - //FALSE: Either data validation failed or at least one table is an empty set. - return ($validation_flag && count(self::$data['users']) > 0 && count(self::$data['courses_users']) > 0); + // Done. + return true; } /** - * Add $row to self::$data. + * Users cannot be registered to the same course multiple times. * - * This should only be called AFTER $row is successfully validated. + * Any course with a user registered more than once is flagged invalid as + * it is indicative of data errors from the CSV file. * - * @param array $row data row to include - * @param string $course course associated with data row - * @param string $section section associated with data row + * @return bool always TRUE */ - private function include_row($row, $course, $section) { - - self::$data['users'][] = array('user_id' => $row[COLUMN_USER_ID], - 'user_numeric_id' => $row[COLUMN_NUMERIC_ID], - 'user_firstname' => $row[COLUMN_FIRSTNAME], - 'user_preferredname' => $row[COLUMN_PREFERREDNAME], - 'user_lastname' => $row[COLUMN_LASTNAME], - 'user_email' => $row[COLUMN_EMAIL]); - - //Group 'courses_users' data by individual courses, so - //upserts can be transacted per course. This helps prevent - //FK violations blocking upserts for other courses. - self::$data['courses_users'][$course][] = array('semester' => self::$semester, - 'course' => $course, - 'user_id' => $row[COLUMN_USER_ID], - 'user_group' => 4, - 'registration_section' => $section, - 'manual_registration' => 'FALSE'); - } + private function check_for_duplicate_user_ids() { + foreach($this->data as $course => $rows) { + $user_ids = null; + // returns FALSE (as in there is an error) when duplicate IDs are found. + if (validate::check_for_duplicate_user_ids($rows, $user_ids) === false) { + $this->invalid_courses[$course] = true; + $msg = "Duplicate user IDs detected in {$course} data: "; + $msg .= implode(", ", $user_ids); + $this->log_it($msg); + } + } - /** - * Retrieves a list of participating courses. - * - * Submitty can handle multiple courses. This function retrieves a list of - * participating courses from the database. - * - * @access private - * @return array list of courses registered in Submitty - */ - private function get_participating_course_list() { + return true; + } - //EXPECTED: self::$db has an active/open Postgres connection. - if (pg_connection_status(self::$db) !== PGSQL_CONNECTION_OK) { - $this->log_it("Error: not connected to Submitty DB when retrieving active course list."); - return false; + private function check_for_excessive_dropped_users() { + $is_validated = true; + $invalid_courses = array(); // intentional local array + $ratio = 0; + $diff = 0; + foreach($this->data as $course => $rows) { + if (!validate::check_for_excessive_dropped_users($rows, $this->semester, $course, $diff, $ratio)) { + $invalid_courses[] = array('course' => $course, 'diff' => $diff, 'ratio' => round(abs($ratio), 3)); + $is_validated = false; + } } - //SQL query code to get course list. - $sql = << $b['course']; }); + $msg = "The following course(s) have an excessive ratio of dropped students.\n Stats show mapped courses combined in base courses.\n"; + array_unshift($invalid_courses, array('course' => "COURSE", 'diff' => "DIFF", 'ratio' => "RATIO")); // Header + foreach ($invalid_courses as $invalid_course) { + $msg .= " " . + str_pad($invalid_course['course'], 18, " ", STR_PAD_RIGHT) . + str_pad($invalid_course['diff'], 6, " ", STR_PAD_LEFT) . + str_pad($invalid_course['ratio'], 8, " ", STR_PAD_LEFT) . + PHP_EOL; + } + $msg .= " No upsert performed on any/all courses in Submitty due to suspicious data sheet."; - //Error check - if ($res === false) { - $this->log_it("RETRIEVE PARTICIPATING COURSES : " . pg_last_error(self::$db)); + $this->log_it($msg); return false; } - //Return course list. - return pg_fetch_all_columns($res, 0); + return true; } /** - * Merge mapped courses into one - * - * Sometimes a course is combined with undergrads/grad students, or a course - * is crosslisted, but meets collectively. Course mappings will "merge" - * courses into a single master course. This can also be used to duplicate - * course enrollment from one course to another (e.g. an intro course may - * duplicate enrollment to an optional extra-lessons pseudo-course). + * Call db::upsert to process CSV data to DB * - * @access private - * @return array a list of "course mappings" (where one course is merged into another) + * @return bool Always true */ - private function get_course_mappings() { - - //EXPECTED: self::$db has an active/open Postgres connection. - if (pg_connection_status(self::$db) !== PGSQL_CONNECTION_OK) { - $this->log_it("Error: not connected to Submitty DB when retrieving course mappings list."); - return false; - } - - //SQL query code to retrieve course mappinsg - $sql = <<log_it("RETRIEVE MAPPED COURSES : " . pg_last_error(self::$db)); - return false; + private function upsert_data() { + foreach ($this->data as $course => $rows) { + if (db::upsert($this->semester, $course, $rows) === false) { + $this->log_it(db::$error); + } } - //Check for no mappings returned. - $results = pg_fetch_all($res); - if (empty($results)) { - return array(); - } + // Done. + return true; + } - //Describe how auto-feed data is translated by mappings. - $mappings = array(); - foreach ($results as $row) { - $course = $row['course']; - $registration_section = $row['registration_section']; - $mapped_course = $row['mapped_course']; - $mapped_section = $row['mapped_section']; - $mappings[$course][$registration_section] = array('mapped_course' => $mapped_course, - 'mapped_section' => $mapped_section); + /** + * Remove process records for a specific course due to a problem with CSV data. + * + * If a problem with CSV data is detected, the entire course will not be + * processed to preserve data integrity. This is done by removing all + * course related records from $this->data. + * Both $this->data and $this->invalid_courses are indexed by course code, + * so removing course data is trivially accomplished by array_diff_key(). + * + * @param string $course Course being removed from process records. + */ + private function invalidate_courses() { + if (!empty($this->invalid_courses)) { + // Remove course data for invalid courses. + $this->data = array_diff_key($this->data, $this->invalid_courses); + + // Log what courses have been flagged invalid. + $msg = "The following courses were not processed: "; + $msg .= implode(", ", array_keys($this->invalid_courses)); + $this->log_it($msg); } - return $mappings; + // Done. + return true; } /** * Open auto feed CSV data file. * - * @access private - * @return boolean indicates success/failure of opening and locking CSV file. + * @return boolean Indicates success/failure of opening and locking CSV file. */ private function open_csv() { - - self::$fh = fopen(CSV_FILE, "r"); - if (self::$fh !== false) { - if (flock(self::$fh, LOCK_SH, $wouldblock)) { - self::$fh_locked = true; + $this->fh = fopen(CSV_FILE, "r"); + if ($this->fh !== false) { + if (flock($this->fh, LOCK_SH, $wouldblock)) { return true; } else if ($wouldblock === 1) { $this->logit("Another process has locked the CSV."); @@ -472,384 +332,27 @@ private function open_csv() { return false; } } else { - $this->log_it("Could not open CSV file. Check config."); + $this->log_it("Could not open CSV file."); return false; } } - /** - * deduplicate data set by a specific column - * - * Users table in "Submitty" database must have a unique student per row. - * per row. Students in multiple courses may have multiple entries where - * where deduplication is necessary. - * - * @access private - * @param array $subset data subset to be deduplicated - * @param mixed $key column by which rows are deduplicated - * @return boolean TRUE when deduplication is completed. FALSE when sorting fails. - */ - private function deduplicate($subset = 'users', $key = 'user_id') { - - // First, sort data subset. On success, remove duplicate rows identified by $key. - if (usort(self::$data[$subset], function($a, $b) use ($key) { return strcmp($a[$key], $b[$key]); })) { - $count = count(self::$data[$subset]); - for ($i = 1; $i < $count; $i++) { - if (self::$data[$subset][$i][$key] === self::$data[$subset][$i-1][$key]) { - unset(self::$data[$subset][$i-1]); - } - } - - //Indicate that deduplication is done. - return true; + /** Close CSV file */ + private function close_csv() { + if (is_resource($this->fh) && get_resource_type($this->fh) === "stream") { + fclose($this->fh); } - - //Something went wrong during sort. Abort and indicate failure. - return false; - } - - /** - * "Update/Insert" data into the database. Code works via "batch" upserts. - * - * Vars assigned NULL are 'inactive' placeholders for readability. - * - * @access private - * @return boolean true when upsert is complete - */ - private function upsert_psql() { - $sql = array('begin' => 'BEGIN', - 'commit' => 'COMMIT', - 'rollback' => 'ROLLBACK'); - - //TEMPORARY tables to hold all new values that will be "upserted" - $sql['users']['temp_table'] = <<log_it("USERS (UPDATE) : " . pg_last_error(self::$db)); - pg_query(self::$db, $sql['rollback']); - break; - case pg_query(self::$db, $sql['users']['insert']): - $this->log_it("USERS (INSERT) : " . pg_last_error(self::$db)); - pg_query(self::$db, $sql['rollback']); - break; - default: - pg_query(self::$db, $sql['commit']); - break; - } - - //'courses_registration_sections' table - //'SELECT semesters' MUST be processed before 'courses_users' - //in order to satisfy database referential integrity. - foreach(self::$data['courses_users'] as $course_name => $course_data) { - pg_query(self::$db, $sql['begin']); - pg_query(self::$db, $sql['registration_section']['temp_table']); - //fills temp table with batch upsert data. - foreach ($course_data as $row) { - pg_query_params(self::$db, $sql['registration_section']['data'], array($row['semester'], $row['course'], $row['registration_section'])); - } - pg_query(self::$db, $sql['registration_section']['lock']); - switch (false) { - case pg_query(self::$db, $sql['registration_section']['insert']): - $this->log_it("REGISTRATION SECTION IDs (INSERT) : " . pg_last_error(self::$db)); - pg_query(self::$db, $sql['rollback']); - break; - default: - pg_query(self::$db, $sql['commit']); - break; - } - } - - //Process 'courses_users' tables (per course). - foreach(self::$data['courses_users'] as $course_name => $course_data) { - pg_query(self::$db, $sql['begin']); - pg_query(self::$db, $sql['courses_users']['temp_table']); - pg_query(self::$db, $sql['registration_section']['temp_table']); - //fills registration_section temp table with batch upsert data. - //fills courses_users temp table with batch upsert data. - foreach($course_data as $row) { - pg_query_params(self::$db, $sql['registration_section']['data'], array($row['semester'], $row['course'], $row['registration_section'])); - pg_query_params(self::$db, $sql['courses_users']['data'], $row); - } - pg_query(self::$db, $sql['courses_users']['lock']); - switch (false) { - case pg_query(self::$db, $sql['registration_section']['insert']): - pg_query(self::$db, $sql['rollback']); - break; - case pg_query(self::$db, $sql['courses_users']['update']): - $this->log_it(strtoupper($course_name) . " (UPDATE) : " . pg_last_error(self::$db)); - pg_query(self::$db, $sql['rollback']); - break; - case pg_query(self::$db, $sql['courses_users']['insert']): - $this->log_it(strtoupper($course_name) . " (INSERT) : " . pg_last_error(self::$db)); - pg_query(self::$db, $sql['rollback']); - break; - case pg_query_params(self::$db, $sql['courses_users']['dropped_students'], array($course_name, self::$semester)): - $this->log_it(strtoupper($course_name) . " (DROPPED STUDENTS) : " . pg_last_error(self::$db)); - pg_query(self::$db, $sql['rollback']); - break; - default: - pg_query(self::$db, $sql['commit']); - } - } - - //indicate success. - return true; } /** * log msg queue holds messages intended for email and text logs. * - * @access private - * @param string $msg message to write to log file + * @param string $msg Message to write to log file */ private function log_it($msg) { - - if (!empty($msg)) { - self::$log_msg_queue .= date('m/d/y H:i:s : ', time()) . $msg . PHP_EOL; - } - } -} //END class submitty_student_auto_feed - - -/** @static class to parse command line arguments */ -class cli_args { - - /** @static @property array holds all CLI argument flags and their values */ - private static $args = array(); - /** @static @property string usage help message */ - private static $help_usage = "Usage: submitty_student_auto_feed.php [-h | --help] [-a auth str] (-t term code)" . PHP_EOL; - /** @static @property string short description help message */ - private static $help_short_desc = "Read student enrollment CSV and upsert to Submitty database." . PHP_EOL; - /** @static @property string argument list help message */ - private static $help_args_list = <<log_msg_queue .= date('m/d/y H:i:s : ', time()) . $msg . PHP_EOL; } -} //END class cli_args +} -/* EOF ====================================================================== */ +// EOF ?>