@@ -19,7 +19,7 @@ class Remediation(NamedTuple):
19
19
error_msg : Optional [str ] = None
20
20
21
21
22
- OptionalDataFrameT = TypeVar ("OptionalDataFrameT" , bound = "Optional[pd.Dataframe ]" )
22
+ OptionalDataFrameT = TypeVar ("OptionalDataFrameT" , bound = "Optional[pd.DataFrame ]" )
23
23
24
24
25
25
def num_examples_validator (df : pd .DataFrame ) -> Remediation :
@@ -490,28 +490,28 @@ def read_any_format(
490
490
dtype = str ,
491
491
).fillna ("" )
492
492
elif fname .lower ().endswith (".jsonl" ):
493
- df = pd .read_json (fname , lines = True , dtype = str ).fillna ("" )
493
+ df = pd .read_json (fname , lines = True , dtype = str ).fillna ("" ) # type: ignore
494
494
if len (df ) == 1 : # type: ignore
495
495
# this is NOT what we expect for a .jsonl file
496
496
immediate_msg = "\n - Your JSONL file appears to be in a JSON format. Your file will be converted to JSONL format"
497
497
necessary_msg = "Your format `JSON` will be converted to `JSONL`"
498
- df = pd .read_json (fname , dtype = str ).fillna ("" )
498
+ df = pd .read_json (fname , dtype = str ).fillna ("" ) # type: ignore
499
499
else :
500
500
pass # this is what we expect for a .jsonl file
501
501
elif fname .lower ().endswith (".json" ):
502
502
try :
503
503
# to handle case where .json file is actually a .jsonl file
504
- df = pd .read_json (fname , lines = True , dtype = str ).fillna ("" )
504
+ df = pd .read_json (fname , lines = True , dtype = str ).fillna ("" ) # type: ignore
505
505
if len (df ) == 1 : # type: ignore
506
506
# this code path corresponds to a .json file that has one line
507
- df = pd .read_json (fname , dtype = str ).fillna ("" )
507
+ df = pd .read_json (fname , dtype = str ).fillna ("" ) # type: ignore
508
508
else :
509
509
# this is NOT what we expect for a .json file
510
510
immediate_msg = "\n - Your JSON file appears to be in a JSONL format. Your file will be converted to JSONL format"
511
511
necessary_msg = "Your format `JSON` will be converted to `JSONL`"
512
512
except ValueError :
513
513
# this code path corresponds to a .json file that has multiple lines (i.e. it is indented)
514
- df = pd .read_json (fname , dtype = str ).fillna ("" )
514
+ df = pd .read_json (fname , dtype = str ).fillna ("" ) # type: ignore
515
515
else :
516
516
error_msg = (
517
517
"Your file must have one of the following extensions: .CSV, .TSV, .XLSX, .TXT, .JSON or .JSONL"
0 commit comments