From 5b43e1b50194b3f7dd228a0a8f60a835852fbd25 Mon Sep 17 00:00:00 2001 From: Josh Friedlander <16547083+lordgrenville@users.noreply.github.com> Date: Wed, 3 Nov 2021 15:42:09 +0200 Subject: [PATCH 1/3] Detect CPORT header in SAS files --- pandas/io/sas/sas_xport.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index bdb7d86a9b37e..8f8c02a6c16ab 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -279,6 +279,10 @@ def _read_header(self): # read file header line1 = self._get_row() if line1 != _correct_line1: + if "**COMPRESSED**" in line1: + raise ValueError( + "Header record indicates a CPORT file, which is not readable." + ) raise ValueError("Header record is not an XPORT file.") line2 = self._get_row() From 8485358c3cd686be7a1c86f39699329c75cbf687 Mon Sep 17 00:00:00 2001 From: Josh Friedlander <16547083+lordgrenville@users.noreply.github.com> Date: Thu, 4 Nov 2021 14:08:29 +0200 Subject: [PATCH 2/3] add unit test for SAS CPORT check --- pandas/tests/io/sas/data/DEMO_PUF.cpt | Bin 0 -> 694 bytes pandas/tests/io/sas/test_xport.py | 9 +++++++++ 2 files changed, 9 insertions(+) create mode 100644 pandas/tests/io/sas/data/DEMO_PUF.cpt diff --git a/pandas/tests/io/sas/data/DEMO_PUF.cpt b/pandas/tests/io/sas/data/DEMO_PUF.cpt new file mode 100644 index 0000000000000000000000000000000000000000..d74b6a70d281240e842f2ed568887e60dfac35b6 GIT binary patch literal 694 zcmbWxyH3L}6b9hB&=NrhM%X+d6bTfkX{wGPNohoGvU{OZUw{#b6_s9qp&QH~QJ&%- zc!E9?2{u)bGGW;|`laLV99?&lDCNEozNhO{|9{kfP8eKJH;MZ^3F)i9@^DO`m0G5>%zS2MnFWjFR?caX69Yc<$72psmEK3mZcKVid93Ii zDsNAEL!XNviL?E7q_-~dZ0Xs`v$x(o$dU3I(sR6M(wjuSC;AS^t~B3D%$77;%4`)) ds?39;dD% Date: Fri, 5 Nov 2021 16:00:47 +0200 Subject: [PATCH 3/3] add reference to documentation --- pandas/io/sas/sas_xport.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 8f8c02a6c16ab..3f9bf6662e99f 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -280,6 +280,8 @@ def _read_header(self): line1 = self._get_row() if line1 != _correct_line1: if "**COMPRESSED**" in line1: + # this was created with the PROC CPORT method and can't be read + # https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/movefile/p1bm6aqp3fw4uin1hucwh718f6kp.htm raise ValueError( "Header record indicates a CPORT file, which is not readable." )