Keras ImageDataGenerator flow_from_dataframe returning KeyError











up vote
0
down vote

favorite












I am trying to build an image classifier with keras and the size of my dataset requires me to use the ImageDataGenerator class along with its flow_from_dataframe method. This is the code I am using.



train_datagen = keras.preprocessing.image.ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
train_generator = train_datagen.flow_from_dataframe(
directory='stage_1_train_images/',
dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
target_size=(1024, 1024))
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D((2, 2),padding='same'))
model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Flatten())
model.add(Dense(128, activation='linear'))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(lr=1000,decay=.99),
metrics=['accuracy'])
model.fit_generator(trainGen, steps_per_epoch=1024/16, epochs=317)


However, when I run this code, I get the following error



KeyError                                  Traceback (most recent call last)
<ipython-input-7-5a88afda8de5> in <module>
7 directory='stage_1_train_images/',
8 dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
----> 9 target_size=(1024, 1024))
10 model = Sequential()
11 model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))

/opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in flow_from_dataframe(self, dataframe, directory, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format, subset, interpolation)
1105 save_format=save_format,
1106 subset=subset,
-> 1107 interpolation=interpolation)
1108
1109 def standardize(self, x):

/opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in __init__(self, dataframe, directory, image_data_generator, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, data_format, save_to_dir, save_prefix, save_format, follow_links, subset, interpolation, dtype)
2056 raise ValueError("has_ext must be either True if filenames in"
2057 " x_col has extensions,else False.")
-> 2058 self.df = dataframe.drop_duplicates(x_col)
2059 self.df[x_col] = self.df[x_col].astype(str)
2060 self.directory = directory

/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in drop_duplicates(self, subset, keep, inplace)
4329 """
4330 inplace = validate_bool_kwarg(inplace, 'inplace')
-> 4331 duplicated = self.duplicated(subset, keep=keep)
4332
4333 if inplace:

/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in duplicated(self, subset, keep)
4379 diff = Index(subset).difference(self.columns)
4380 if not diff.empty:
-> 4381 raise KeyError(diff)
4382
4383 vals = (col.values for name, col in self.iteritems()

KeyError: Index(['filename'], dtype='object')


What is going wrong? I have tried multiple things to fix this but cannot figure out why this is happening.










share|improve this question




























    up vote
    0
    down vote

    favorite












    I am trying to build an image classifier with keras and the size of my dataset requires me to use the ImageDataGenerator class along with its flow_from_dataframe method. This is the code I am using.



    train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)
    train_generator = train_datagen.flow_from_dataframe(
    directory='stage_1_train_images/',
    dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
    target_size=(1024, 1024))
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
    model.add(LeakyReLU(alpha=0.1))
    model.add(MaxPooling2D((2, 2),padding='same'))
    model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
    model.add(LeakyReLU(alpha=0.1))
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
    model.add(LeakyReLU(alpha=0.1))
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    model.add(Flatten())
    model.add(Dense(128, activation='linear'))
    model.add(LeakyReLU(alpha=0.1))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss=keras.losses.categorical_crossentropy,
    optimizer=keras.optimizers.Adam(lr=1000,decay=.99),
    metrics=['accuracy'])
    model.fit_generator(trainGen, steps_per_epoch=1024/16, epochs=317)


    However, when I run this code, I get the following error



    KeyError                                  Traceback (most recent call last)
    <ipython-input-7-5a88afda8de5> in <module>
    7 directory='stage_1_train_images/',
    8 dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
    ----> 9 target_size=(1024, 1024))
    10 model = Sequential()
    11 model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))

    /opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in flow_from_dataframe(self, dataframe, directory, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format, subset, interpolation)
    1105 save_format=save_format,
    1106 subset=subset,
    -> 1107 interpolation=interpolation)
    1108
    1109 def standardize(self, x):

    /opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in __init__(self, dataframe, directory, image_data_generator, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, data_format, save_to_dir, save_prefix, save_format, follow_links, subset, interpolation, dtype)
    2056 raise ValueError("has_ext must be either True if filenames in"
    2057 " x_col has extensions,else False.")
    -> 2058 self.df = dataframe.drop_duplicates(x_col)
    2059 self.df[x_col] = self.df[x_col].astype(str)
    2060 self.directory = directory

    /opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in drop_duplicates(self, subset, keep, inplace)
    4329 """
    4330 inplace = validate_bool_kwarg(inplace, 'inplace')
    -> 4331 duplicated = self.duplicated(subset, keep=keep)
    4332
    4333 if inplace:

    /opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in duplicated(self, subset, keep)
    4379 diff = Index(subset).difference(self.columns)
    4380 if not diff.empty:
    -> 4381 raise KeyError(diff)
    4382
    4383 vals = (col.values for name, col in self.iteritems()

    KeyError: Index(['filename'], dtype='object')


    What is going wrong? I have tried multiple things to fix this but cannot figure out why this is happening.










    share|improve this question


























      up vote
      0
      down vote

      favorite









      up vote
      0
      down vote

      favorite











      I am trying to build an image classifier with keras and the size of my dataset requires me to use the ImageDataGenerator class along with its flow_from_dataframe method. This is the code I am using.



      train_datagen = keras.preprocessing.image.ImageDataGenerator(
      rescale=1./255,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True)
      train_generator = train_datagen.flow_from_dataframe(
      directory='stage_1_train_images/',
      dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
      target_size=(1024, 1024))
      model = Sequential()
      model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
      model.add(LeakyReLU(alpha=0.1))
      model.add(MaxPooling2D((2, 2),padding='same'))
      model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
      model.add(LeakyReLU(alpha=0.1))
      model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
      model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
      model.add(LeakyReLU(alpha=0.1))
      model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
      model.add(Flatten())
      model.add(Dense(128, activation='linear'))
      model.add(LeakyReLU(alpha=0.1))
      model.add(Dense(num_classes, activation='softmax'))
      model.compile(loss=keras.losses.categorical_crossentropy,
      optimizer=keras.optimizers.Adam(lr=1000,decay=.99),
      metrics=['accuracy'])
      model.fit_generator(trainGen, steps_per_epoch=1024/16, epochs=317)


      However, when I run this code, I get the following error



      KeyError                                  Traceback (most recent call last)
      <ipython-input-7-5a88afda8de5> in <module>
      7 directory='stage_1_train_images/',
      8 dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
      ----> 9 target_size=(1024, 1024))
      10 model = Sequential()
      11 model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))

      /opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in flow_from_dataframe(self, dataframe, directory, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format, subset, interpolation)
      1105 save_format=save_format,
      1106 subset=subset,
      -> 1107 interpolation=interpolation)
      1108
      1109 def standardize(self, x):

      /opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in __init__(self, dataframe, directory, image_data_generator, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, data_format, save_to_dir, save_prefix, save_format, follow_links, subset, interpolation, dtype)
      2056 raise ValueError("has_ext must be either True if filenames in"
      2057 " x_col has extensions,else False.")
      -> 2058 self.df = dataframe.drop_duplicates(x_col)
      2059 self.df[x_col] = self.df[x_col].astype(str)
      2060 self.directory = directory

      /opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in drop_duplicates(self, subset, keep, inplace)
      4329 """
      4330 inplace = validate_bool_kwarg(inplace, 'inplace')
      -> 4331 duplicated = self.duplicated(subset, keep=keep)
      4332
      4333 if inplace:

      /opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in duplicated(self, subset, keep)
      4379 diff = Index(subset).difference(self.columns)
      4380 if not diff.empty:
      -> 4381 raise KeyError(diff)
      4382
      4383 vals = (col.values for name, col in self.iteritems()

      KeyError: Index(['filename'], dtype='object')


      What is going wrong? I have tried multiple things to fix this but cannot figure out why this is happening.










      share|improve this question















      I am trying to build an image classifier with keras and the size of my dataset requires me to use the ImageDataGenerator class along with its flow_from_dataframe method. This is the code I am using.



      train_datagen = keras.preprocessing.image.ImageDataGenerator(
      rescale=1./255,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True)
      train_generator = train_datagen.flow_from_dataframe(
      directory='stage_1_train_images/',
      dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
      target_size=(1024, 1024))
      model = Sequential()
      model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
      model.add(LeakyReLU(alpha=0.1))
      model.add(MaxPooling2D((2, 2),padding='same'))
      model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
      model.add(LeakyReLU(alpha=0.1))
      model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
      model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
      model.add(LeakyReLU(alpha=0.1))
      model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
      model.add(Flatten())
      model.add(Dense(128, activation='linear'))
      model.add(LeakyReLU(alpha=0.1))
      model.add(Dense(num_classes, activation='softmax'))
      model.compile(loss=keras.losses.categorical_crossentropy,
      optimizer=keras.optimizers.Adam(lr=1000,decay=.99),
      metrics=['accuracy'])
      model.fit_generator(trainGen, steps_per_epoch=1024/16, epochs=317)


      However, when I run this code, I get the following error



      KeyError                                  Traceback (most recent call last)
      <ipython-input-7-5a88afda8de5> in <module>
      7 directory='stage_1_train_images/',
      8 dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
      ----> 9 target_size=(1024, 1024))
      10 model = Sequential()
      11 model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))

      /opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in flow_from_dataframe(self, dataframe, directory, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format, subset, interpolation)
      1105 save_format=save_format,
      1106 subset=subset,
      -> 1107 interpolation=interpolation)
      1108
      1109 def standardize(self, x):

      /opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in __init__(self, dataframe, directory, image_data_generator, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, data_format, save_to_dir, save_prefix, save_format, follow_links, subset, interpolation, dtype)
      2056 raise ValueError("has_ext must be either True if filenames in"
      2057 " x_col has extensions,else False.")
      -> 2058 self.df = dataframe.drop_duplicates(x_col)
      2059 self.df[x_col] = self.df[x_col].astype(str)
      2060 self.directory = directory

      /opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in drop_duplicates(self, subset, keep, inplace)
      4329 """
      4330 inplace = validate_bool_kwarg(inplace, 'inplace')
      -> 4331 duplicated = self.duplicated(subset, keep=keep)
      4332
      4333 if inplace:

      /opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in duplicated(self, subset, keep)
      4379 diff = Index(subset).difference(self.columns)
      4380 if not diff.empty:
      -> 4381 raise KeyError(diff)
      4382
      4383 vals = (col.values for name, col in self.iteritems()

      KeyError: Index(['filename'], dtype='object')


      What is going wrong? I have tried multiple things to fix this but cannot figure out why this is happening.







      python image-processing machine-learning keras






      share|improve this question















      share|improve this question













      share|improve this question




      share|improve this question








      edited Oct 18 at 21:45

























      asked Oct 16 at 1:56









      Dhruv Chanana

      12




      12
























          1 Answer
          1






          active

          oldest

          votes

















          up vote
          0
          down vote













          As per the documentation here, you need to specify x_col and y_col as arguments in the flow_from_dataframe method. The defaults for x_col and y_col are 'filename' and 'class', respectively. From the error, I'm guessing that you don't have a column named "filename" in your DataFrame which is what causes the KeyError. To fix this, specify the following two arguments in the flow_from_dataframe method.




          x_col: string, column in the dataframe that contains
          the filenames of the target images.



          y_col: string or list of strings,columns in
          the dataframe that will be the target data.







          share|improve this answer





















            Your Answer






            StackExchange.ifUsing("editor", function () {
            StackExchange.using("externalEditor", function () {
            StackExchange.using("snippets", function () {
            StackExchange.snippets.init();
            });
            });
            }, "code-snippets");

            StackExchange.ready(function() {
            var channelOptions = {
            tags: "".split(" "),
            id: "1"
            };
            initTagRenderer("".split(" "), "".split(" "), channelOptions);

            StackExchange.using("externalEditor", function() {
            // Have to fire editor after snippets, if snippets enabled
            if (StackExchange.settings.snippets.snippetsEnabled) {
            StackExchange.using("snippets", function() {
            createEditor();
            });
            }
            else {
            createEditor();
            }
            });

            function createEditor() {
            StackExchange.prepareEditor({
            heartbeatType: 'answer',
            convertImagesToLinks: true,
            noModals: true,
            showLowRepImageUploadWarning: true,
            reputationToPostImages: 10,
            bindNavPrevention: true,
            postfix: "",
            imageUploader: {
            brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
            contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
            allowUrls: true
            },
            onDemand: true,
            discardSelector: ".discard-answer"
            ,immediatelyShowMarkdownHelp:true
            });


            }
            });














             

            draft saved


            draft discarded


















            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f52826906%2fkeras-imagedatagenerator-flow-from-dataframe-returning-keyerror%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown

























            1 Answer
            1






            active

            oldest

            votes








            1 Answer
            1






            active

            oldest

            votes









            active

            oldest

            votes






            active

            oldest

            votes








            up vote
            0
            down vote













            As per the documentation here, you need to specify x_col and y_col as arguments in the flow_from_dataframe method. The defaults for x_col and y_col are 'filename' and 'class', respectively. From the error, I'm guessing that you don't have a column named "filename" in your DataFrame which is what causes the KeyError. To fix this, specify the following two arguments in the flow_from_dataframe method.




            x_col: string, column in the dataframe that contains
            the filenames of the target images.



            y_col: string or list of strings,columns in
            the dataframe that will be the target data.







            share|improve this answer

























              up vote
              0
              down vote













              As per the documentation here, you need to specify x_col and y_col as arguments in the flow_from_dataframe method. The defaults for x_col and y_col are 'filename' and 'class', respectively. From the error, I'm guessing that you don't have a column named "filename" in your DataFrame which is what causes the KeyError. To fix this, specify the following two arguments in the flow_from_dataframe method.




              x_col: string, column in the dataframe that contains
              the filenames of the target images.



              y_col: string or list of strings,columns in
              the dataframe that will be the target data.







              share|improve this answer























                up vote
                0
                down vote










                up vote
                0
                down vote









                As per the documentation here, you need to specify x_col and y_col as arguments in the flow_from_dataframe method. The defaults for x_col and y_col are 'filename' and 'class', respectively. From the error, I'm guessing that you don't have a column named "filename" in your DataFrame which is what causes the KeyError. To fix this, specify the following two arguments in the flow_from_dataframe method.




                x_col: string, column in the dataframe that contains
                the filenames of the target images.



                y_col: string or list of strings,columns in
                the dataframe that will be the target data.







                share|improve this answer












                As per the documentation here, you need to specify x_col and y_col as arguments in the flow_from_dataframe method. The defaults for x_col and y_col are 'filename' and 'class', respectively. From the error, I'm guessing that you don't have a column named "filename" in your DataFrame which is what causes the KeyError. To fix this, specify the following two arguments in the flow_from_dataframe method.




                x_col: string, column in the dataframe that contains
                the filenames of the target images.



                y_col: string or list of strings,columns in
                the dataframe that will be the target data.








                share|improve this answer












                share|improve this answer



                share|improve this answer










                answered Nov 21 at 18:54









                UserAnon

                114




                114






























                     

                    draft saved


                    draft discarded



















































                     


                    draft saved


                    draft discarded














                    StackExchange.ready(
                    function () {
                    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f52826906%2fkeras-imagedatagenerator-flow-from-dataframe-returning-keyerror%23new-answer', 'question_page');
                    }
                    );

                    Post as a guest















                    Required, but never shown





















































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown

































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown







                    Popular posts from this blog

                    How to ignore python UserWarning in pytest?

                    What visual should I use to simply compare current year value vs last year in Power BI desktop

                    Script to remove string up to first number