@@ -544,97 +544,68 @@ class FeatherReadable : public IOReadableInterface {
544544 new SizedRandomAccessFile (env_, filename, memory_data, memory_size));
545545 TF_RETURN_IF_ERROR (file_->GetFileSize (&file_size_));
546546
547- // FEA1.....[metadata][uint32 metadata_length]FEA1
548- static constexpr const char * kFeatherMagicBytes = " FEA1" ;
549-
550- size_t header_length = strlen (kFeatherMagicBytes );
551- size_t footer_length = sizeof (uint32) + strlen (kFeatherMagicBytes );
552-
553- string buffer;
554- buffer.resize (header_length > footer_length ? header_length
555- : footer_length);
556-
557- StringPiece result;
558-
559- TF_RETURN_IF_ERROR (file_->Read (0 , header_length, &result, &buffer[0 ]));
560- if (memcmp (buffer.data (), kFeatherMagicBytes , header_length) != 0 ) {
561- return errors::InvalidArgument (" not a feather file" );
547+ std::shared_ptr<ArrowRandomAccessFile> feather_file;
548+ feather_file.reset (new ArrowRandomAccessFile (file_.get (), file_size_));
549+ auto maybe_reader = arrow::ipc::feather::Reader::Open (feather_file);
550+ if (!maybe_reader.ok ()) {
551+ return errors::Internal (maybe_reader.status ().ToString ());
562552 }
553+ std::shared_ptr<arrow::ipc::feather::Reader> reader =
554+ maybe_reader.ValueOrDie ();
555+ std::shared_ptr<arrow::Schema> schema = reader->schema ();
563556
564- TF_RETURN_IF_ERROR (file_->Read (file_size_ - footer_length, footer_length,
565- &result, &buffer[0 ]));
566- if (memcmp (buffer.data () + sizeof (uint32), kFeatherMagicBytes ,
567- footer_length - sizeof (uint32)) != 0 ) {
568- return errors::InvalidArgument (" incomplete feather file" );
569- }
570-
571- uint32 metadata_length = *reinterpret_cast <const uint32*>(buffer.data ());
572-
573- buffer.resize (metadata_length);
574-
575- TF_RETURN_IF_ERROR (file_->Read (file_size_ - footer_length - metadata_length,
576- metadata_length, &result, &buffer[0 ]));
577-
578- const ::arrow::ipc::feather::fbs::CTable* table =
579- ::arrow::ipc::feather::fbs::GetCTable (buffer.data());
580-
581- if (table->version () < ::arrow::ipc::feather::kFeatherV1Version ) {
582- return errors::InvalidArgument (" feather file is old: " , table->version (),
583- " vs. " ,
584- ::arrow::ipc::feather::kFeatherV1Version );
557+ std::shared_ptr<arrow::Table> table;
558+ arrow::Status s = reader->Read (&table);
559+ if (!s.ok ()) {
560+ return errors::Internal (s.ToString ());
585561 }
586562
587- for (size_t i = 0 ; i < table-> columns ()-> size (); i++) {
563+ for (int i = 0 ; i < schema-> num_fields (); i++) {
588564 ::tensorflow::DataType dtype = ::tensorflow::DataType::DT_INVALID;
589- switch (table-> columns ()-> Get ( i)->values ()->type ()) {
590- case ::arrow::ipc::feather::fbs:: Type::BOOL:
565+ switch (schema-> field ( i)->type ()->id ()) {
566+ case ::arrow::Type::BOOL:
591567 dtype = ::tensorflow::DataType::DT_BOOL;
592568 break ;
593- case ::arrow::ipc::feather::fbs:: Type::INT8:
569+ case ::arrow::Type::INT8:
594570 dtype = ::tensorflow::DataType::DT_INT8;
595571 break ;
596- case ::arrow::ipc::feather::fbs:: Type::INT16:
572+ case ::arrow::Type::INT16:
597573 dtype = ::tensorflow::DataType::DT_INT16;
598574 break ;
599- case ::arrow::ipc::feather::fbs:: Type::INT32:
575+ case ::arrow::Type::INT32:
600576 dtype = ::tensorflow::DataType::DT_INT32;
601577 break ;
602- case ::arrow::ipc::feather::fbs:: Type::INT64:
578+ case ::arrow::Type::INT64:
603579 dtype = ::tensorflow::DataType::DT_INT64;
604580 break ;
605- case ::arrow::ipc::feather::fbs:: Type::UINT8:
581+ case ::arrow::Type::UINT8:
606582 dtype = ::tensorflow::DataType::DT_UINT8;
607583 break ;
608- case ::arrow::ipc::feather::fbs:: Type::UINT16:
584+ case ::arrow::Type::UINT16:
609585 dtype = ::tensorflow::DataType::DT_UINT16;
610586 break ;
611- case ::arrow::ipc::feather::fbs:: Type::UINT32:
587+ case ::arrow::Type::UINT32:
612588 dtype = ::tensorflow::DataType::DT_UINT32;
613589 break ;
614- case ::arrow::ipc::feather::fbs:: Type::UINT64:
590+ case ::arrow::Type::UINT64:
615591 dtype = ::tensorflow::DataType::DT_UINT64;
616592 break ;
617- case ::arrow::ipc::feather::fbs:: Type::FLOAT:
593+ case ::arrow::Type::FLOAT:
618594 dtype = ::tensorflow::DataType::DT_FLOAT;
619595 break ;
620- case ::arrow::ipc::feather::fbs:: Type::DOUBLE:
596+ case ::arrow::Type::DOUBLE:
621597 dtype = ::tensorflow::DataType::DT_DOUBLE;
622598 break ;
623- case ::arrow::ipc::feather::fbs::Type::UTF8:
624- case ::arrow::ipc::feather::fbs::Type::BINARY:
625- case ::arrow::ipc::feather::fbs::Type::CATEGORY:
626- case ::arrow::ipc::feather::fbs::Type::TIMESTAMP:
627- case ::arrow::ipc::feather::fbs::Type::DATE:
628- case ::arrow::ipc::feather::fbs::Type::TIME:
629- // case ::arrow::ipc::feather::fbs::Type::LARGE_UTF8:
630- // case ::arrow::ipc::feather::fbs::Type::LARGE_BINARY:
599+ case ::arrow::Type::BINARY:
600+ dtype = ::tensorflow::DataType::DT_STRING;
601+ break ;
631602 default :
632603 break ;
633604 }
634605 shapes_.push_back (TensorShape ({static_cast <int64>(table->num_rows ())}));
635606 dtypes_.push_back (dtype);
636- columns_.push_back (table-> columns ()-> Get ( i)->name ()-> str ());
637- columns_index_[table-> columns ()-> Get ( i)->name ()-> str ()] = i;
607+ columns_.push_back (schema-> field ( i)->name ());
608+ columns_index_[schema-> field ( i)->name ()] = i;
638609 }
639610
640611 return Status::OK ();
@@ -751,6 +722,17 @@ class FeatherReadable : public IOReadableInterface {
751722 FEATHER_PROCESS_TYPE (double ,
752723 ::arrow::NumericArray<::arrow::DoubleType>);
753724 break ;
725+ case DT_STRING: {
726+ int64 curr_index = 0 ;
727+ for (auto chunk : slice->chunks ()) {
728+ for (int64_t item = 0 ; item < chunk->length (); item++) {
729+ value->flat <tstring>()(curr_index) =
730+ (dynamic_cast <::arrow::BinaryArray*>(chunk.get ()))
731+ ->GetString (item);
732+ curr_index++;
733+ }
734+ }
735+ } break ;
754736 default :
755737 return errors::InvalidArgument (" data type is not supported: " ,
756738 DataTypeString (value->dtype ()));
0 commit comments