imdb dataset.

Oceania2018 · Oceania2018 · commit 93fd34b225cb · 2020-12-26T09:33:23.000-06:00
diff --git a/TensorFlow.NET.sln b/TensorFlow.NET.sln
@@ -13,7 +13,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Console", "src\T
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Keras", "src\TensorFlowNET.Keras\Tensorflow.Keras.csproj", "{49D71826-C03D-4FA7-9BAC-22C1327E65CF}"
 EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Tensorflow.Text", "src\TensorFlowNET.Text\Tensorflow.Text.csproj", "{1AB8108D-4FFE-4A16-88E7-328EAF686370}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Text", "src\TensorFlowNET.Text\Tensorflow.Text.csproj", "{1AB8108D-4FFE-4A16-88E7-328EAF686370}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Recommenders", "src\TensorFlowNET.Recommenders\Tensorflow.Recommenders.csproj", "{F17AAECB-960A-4E18-A270-BAD776F0E55B}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -175,6 +177,30 @@ Global
 		{1AB8108D-4FFE-4A16-88E7-328EAF686370}.Release|x64.Build.0 = Release|Any CPU
 		{1AB8108D-4FFE-4A16-88E7-328EAF686370}.Release|x86.ActiveCfg = Release|Any CPU
 		{1AB8108D-4FFE-4A16-88E7-328EAF686370}.Release|x86.Build.0 = Release|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Debug|x64.ActiveCfg = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Debug|x64.Build.0 = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Debug|x86.Build.0 = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Debug-Minimal|Any CPU.ActiveCfg = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Debug-Minimal|Any CPU.Build.0 = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Debug-Minimal|x64.ActiveCfg = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Debug-Minimal|x64.Build.0 = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Debug-Minimal|x86.ActiveCfg = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Debug-Minimal|x86.Build.0 = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Publish|Any CPU.ActiveCfg = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Publish|Any CPU.Build.0 = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Publish|x64.ActiveCfg = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Publish|x64.Build.0 = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Publish|x86.ActiveCfg = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Publish|x86.Build.0 = Debug|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Release|Any CPU.Build.0 = Release|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Release|x64.ActiveCfg = Release|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Release|x64.Build.0 = Release|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Release|x86.ActiveCfg = Release|Any CPU
+		{F17AAECB-960A-4E18-A270-BAD776F0E55B}.Release|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/src/TensorFlowNET.Console/Tensorflow.Console.csproj b/src/TensorFlowNET.Console/Tensorflow.Console.csproj
@@ -12,7 +12,7 @@
   </ItemGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\TensorFlowNET.Keras\Tensorflow.Keras.csproj" />
+    <ProjectReference Include="..\TensorFlowNET.Recommenders\Tensorflow.Recommenders.csproj" />
     <ProjectReference Include="..\TensorFlowNET.Text\Tensorflow.Text.csproj" />
   </ItemGroup>
 
diff --git a/src/TensorFlowNET.Keras/Datasets/Imdb.cs b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
@@ -0,0 +1,97 @@
+﻿using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+using Tensorflow.Keras.Utils;
+using NumSharp;
+using System.Linq;
+
+namespace Tensorflow.Keras.Datasets
+{
+    /// <summary>
+    /// This is a dataset of 25,000 movies reviews from IMDB, labeled by sentiment
+    /// (positive/negative). Reviews have been preprocessed, and each review is
+    /// encoded as a list of word indexes(integers).
+    /// </summary>
+    public class Imdb
+    {
+        string origin_folder = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/";
+        string file_name = "imdb.npz";
+        string dest_folder = "imdb";
+
+        /// <summary>
+        /// Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/).
+        /// </summary>
+        /// <param name="path"></param>
+        /// <param name="num_words"></param>
+        /// <param name="skip_top"></param>
+        /// <param name="maxlen"></param>
+        /// <param name="seed"></param>
+        /// <param name="start_char"></param>
+        /// <param name="oov_char"></param>
+        /// <param name="index_from"></param>
+        /// <returns></returns>
+        public DatasetPass load_data(string path = "imdb.npz",
+            int num_words = -1,
+            int skip_top = 0,
+            int maxlen = -1,
+            int seed = 113,
+            int start_char = 1,
+            int oov_char= 2,
+            int index_from = 3)
+        {
+            var dst = Download();
+
+            var lines = File.ReadAllLines(Path.Combine(dst, "imdb_train.txt"));
+            var x_train_string = new string[lines.Length];
+            var y_train = np.zeros(new int[] { lines.Length }, NPTypeCode.Int64);
+            for (int i = 0; i < lines.Length; i++)
+            {
+                y_train[i] = long.Parse(lines[i].Substring(0, 1));
+                x_train_string[i] = lines[i].Substring(2);
+            }
+
+            var x_train = np.array(x_train_string);
+
+            File.ReadAllLines(Path.Combine(dst, "imdb_test.txt"));
+            var x_test_string = new string[lines.Length];
+            var y_test = np.zeros(new int[] { lines.Length }, NPTypeCode.Int64);
+            for (int i = 0; i < lines.Length; i++)
+            {
+                y_test[i] = long.Parse(lines[i].Substring(0, 1));
+                x_test_string[i] = lines[i].Substring(2);
+            }
+
+            var x_test = np.array(x_test_string);
+
+            return new DatasetPass
+            {
+                Train = (x_train, y_train),
+                Test = (x_test, y_test)
+            };
+        }
+
+        (NDArray, NDArray) LoadX(byte[] bytes)
+        {
+            var y = np.Load_Npz<byte[]>(bytes);
+            return (y["x_train.npy"], y["x_test.npy"]);
+        }
+
+        (NDArray, NDArray) LoadY(byte[] bytes)
+        {
+            var y = np.Load_Npz<long[]>(bytes);
+            return (y["y_train.npy"], y["y_test.npy"]);
+        }
+
+        string Download()
+        {
+            var dst = Path.Combine(Path.GetTempPath(), dest_folder);
+            Directory.CreateDirectory(dst);
+
+            Web.Download(origin_folder + file_name, dst, file_name);
+
+            return dst;
+            // return Path.Combine(dst, file_name);
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Keras/Datasets/KerasDataset.cs b/src/TensorFlowNET.Keras/Datasets/KerasDataset.cs
@@ -20,5 +20,6 @@ public class KerasDataset
     {
         public Mnist mnist { get; } = new Mnist();
         public Cifar10 cifar10 { get; } = new Cifar10();
+        public Imdb imdb { get; } = new Imdb();
     }
 }
diff --git a/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs b/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs
@@ -57,5 +57,20 @@ public IDatasetV2 image_dataset_from_directory(string directory,
             dataset = dataset.batch(batch_size);
             return dataset;
         }
+
+        public IDatasetV2 text_dataset_from_directory(string directory,
+            string labels = "inferred",
+            string label_mode = "int",
+            string[] class_names = null,
+            int batch_size = 32,
+            bool shuffle = true,
+            int? seed = null,
+            float validation_split = 0.2f,
+            string subset = null)
+        {
+           
+
+            return null;
+        }
     }
 }
diff --git a/src/TensorFlowNET.Recommenders/Tensorflow.Recommenders.csproj b/src/TensorFlowNET.Recommenders/Tensorflow.Recommenders.csproj
@@ -0,0 +1,21 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>netstandard2.0</TargetFramework>
+    <Version>0.0.1</Version>
+    <Description>TensorFlow Recommenders is a library for building recommender system models using TensorFlow.</Description>
+    <PackageLicenseFile>LICENSE</PackageLicenseFile>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <None Include="..\..\LICENSE">
+      <Pack>True</Pack>
+      <PackagePath></PackagePath>
+    </None>
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\TensorFlowNET.Keras\Tensorflow.Keras.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/src/TensorFlowNET.Text/Tensorflow.Text.csproj b/src/TensorFlowNET.Text/Tensorflow.Text.csproj
@@ -6,14 +6,22 @@
     <AssemblyName>Tensorflow.Text</AssemblyName>
     <GeneratePackageOnBuild>true</GeneratePackageOnBuild>
     <Version>0.0.1</Version>
+    <PackageLicenseFile>LICENSE</PackageLicenseFile>
   </PropertyGroup>
 
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
     <DefineConstants>DEBUG;TRACE</DefineConstants>
   </PropertyGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\TensorFlowNET.Core\Tensorflow.Binding.csproj" />
+    <None Include="..\..\LICENSE">
+      <Pack>True</Pack>
+      <PackagePath></PackagePath>
+    </None>
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\TensorFlowNET.Keras\Tensorflow.Keras.csproj" />
   </ItemGroup>
 
 </Project>
diff --git a/src/TensorFlowNet.Benchmarks/Tensorflow.Benchmark.csproj b/src/TensorFlowNet.Benchmarks/Tensorflow.Benchmark.csproj
@@ -29,12 +29,11 @@
 
   <ItemGroup>
     <PackageReference Include="BenchmarkDotNet" Version="0.12.1" />
-    <PackageReference Include="SciSharp.TensorFlow.Redist" Version="2.3.0" />
-    <PackageReference Include="TensorFlow.NET" Version="0.20.0" />
+    <PackageReference Include="SciSharp.TensorFlow.Redist" Version="2.3.1" />
   </ItemGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\TensorFlowNET.Core\Tensorflow.Binding.csproj" />
+    <ProjectReference Include="..\TensorFlowNET.Keras\Tensorflow.Keras.csproj" />
   </ItemGroup>
 
 </Project>
diff --git a/test/TensorFlowNET.UnitTest/Tensorflow.UnitTest.csproj b/test/TensorFlowNET.UnitTest/Tensorflow.UnitTest.csproj
@@ -55,7 +55,7 @@
   </ItemGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\..\src\TensorFlowNET.Keras\Tensorflow.Keras.csproj" />
+    <ProjectReference Include="..\..\src\TensorFlowNET.Recommenders\Tensorflow.Recommenders.csproj" />
     <ProjectReference Include="..\..\src\TensorFlowNET.Text\Tensorflow.Text.csproj" />
   </ItemGroup>
 

Original file line number	Diff line number	Diff line change
`@@ -20,5 +20,6 @@ public class KerasDataset`
`20`	`20`	`{`
`21`	`21`	`public Mnist mnist { get; } = new Mnist();`
`22`	`22`	`public Cifar10 cifar10 { get; } = new Cifar10();`
	`23`	`+ public Imdb imdb { get; } = new Imdb();`
`23`	`24`	`}`
`24`	`25`	`}`