Hi team, I am currently using Airbyte to read CSV files from S3 bucket
I am getting an error if the file only has headers.
Is there a configuration workaround that we can use for skipping files with headers only?
Traceback (most recent call last):
File "/airbyte/integration_code/main.py", line 13, in <module>
launch(source, sys.argv[1:])
File "/usr/local/lib/python3.9/site-packages/airbyte_cdk/entrypoint.py", line 129, in launch
for message in source_entrypoint.run(parsed_args):
File "/usr/local/lib/python3.9/site-packages/airbyte_cdk/entrypoint.py", line 120, in run
for message in generator:
File "/usr/local/lib/python3.9/site-packages/airbyte_cdk/sources/abstract_source.py", line 123, in read
raise e
File "/usr/local/lib/python3.9/site-packages/airbyte_cdk/sources/abstract_source.py", line 114, in read
yield from self._read_stream(
File "/usr/local/lib/python3.9/site-packages/airbyte_cdk/sources/abstract_source.py", line 159, in _read_stream
for record in record_iterator:
File "/usr/local/lib/python3.9/site-packages/airbyte_cdk/sources/abstract_source.py", line 248, in _read_full_refresh
for record in records:
File "/airbyte/integration_code/source_s3/source_files_abstract/stream.py", line 452, in read_records
yield from super().read_records(sync_mode, cursor_field, stream_slice, stream_state)
File "/airbyte/integration_code/source_s3/source_files_abstract/stream.py", line 346, in read_records
file_reader = self.fileformatparser_class(self._format, self._get_master_schema())
File "/airbyte/integration_code/source_s3/source_files_abstract/stream.py", line 235, in _get_master_schema
raise RuntimeError(
RuntimeError: Detected mismatched datatype on column 'Count', in file 'test/Report.csv'. Should be 'integer', but found 'string'.