Keras ImageDataGenerator flow_from_dataframe returning KeyError
up vote
0
down vote
favorite
I am trying to build an image classifier with keras and the size of my dataset requires me to use the ImageDataGenerator class along with its flow_from_dataframe method. This is the code I am using.
train_datagen = keras.preprocessing.image.ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
train_generator = train_datagen.flow_from_dataframe(
directory='stage_1_train_images/',
dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
target_size=(1024, 1024))
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D((2, 2),padding='same'))
model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Flatten())
model.add(Dense(128, activation='linear'))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(lr=1000,decay=.99),
metrics=['accuracy'])
model.fit_generator(trainGen, steps_per_epoch=1024/16, epochs=317)
However, when I run this code, I get the following error
KeyError Traceback (most recent call last)
<ipython-input-7-5a88afda8de5> in <module>
7 directory='stage_1_train_images/',
8 dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
----> 9 target_size=(1024, 1024))
10 model = Sequential()
11 model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
/opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in flow_from_dataframe(self, dataframe, directory, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format, subset, interpolation)
1105 save_format=save_format,
1106 subset=subset,
-> 1107 interpolation=interpolation)
1108
1109 def standardize(self, x):
/opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in __init__(self, dataframe, directory, image_data_generator, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, data_format, save_to_dir, save_prefix, save_format, follow_links, subset, interpolation, dtype)
2056 raise ValueError("has_ext must be either True if filenames in"
2057 " x_col has extensions,else False.")
-> 2058 self.df = dataframe.drop_duplicates(x_col)
2059 self.df[x_col] = self.df[x_col].astype(str)
2060 self.directory = directory
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in drop_duplicates(self, subset, keep, inplace)
4329 """
4330 inplace = validate_bool_kwarg(inplace, 'inplace')
-> 4331 duplicated = self.duplicated(subset, keep=keep)
4332
4333 if inplace:
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in duplicated(self, subset, keep)
4379 diff = Index(subset).difference(self.columns)
4380 if not diff.empty:
-> 4381 raise KeyError(diff)
4382
4383 vals = (col.values for name, col in self.iteritems()
KeyError: Index(['filename'], dtype='object')
What is going wrong? I have tried multiple things to fix this but cannot figure out why this is happening.
python image-processing machine-learning keras
add a comment |
up vote
0
down vote
favorite
I am trying to build an image classifier with keras and the size of my dataset requires me to use the ImageDataGenerator class along with its flow_from_dataframe method. This is the code I am using.
train_datagen = keras.preprocessing.image.ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
train_generator = train_datagen.flow_from_dataframe(
directory='stage_1_train_images/',
dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
target_size=(1024, 1024))
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D((2, 2),padding='same'))
model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Flatten())
model.add(Dense(128, activation='linear'))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(lr=1000,decay=.99),
metrics=['accuracy'])
model.fit_generator(trainGen, steps_per_epoch=1024/16, epochs=317)
However, when I run this code, I get the following error
KeyError Traceback (most recent call last)
<ipython-input-7-5a88afda8de5> in <module>
7 directory='stage_1_train_images/',
8 dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
----> 9 target_size=(1024, 1024))
10 model = Sequential()
11 model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
/opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in flow_from_dataframe(self, dataframe, directory, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format, subset, interpolation)
1105 save_format=save_format,
1106 subset=subset,
-> 1107 interpolation=interpolation)
1108
1109 def standardize(self, x):
/opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in __init__(self, dataframe, directory, image_data_generator, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, data_format, save_to_dir, save_prefix, save_format, follow_links, subset, interpolation, dtype)
2056 raise ValueError("has_ext must be either True if filenames in"
2057 " x_col has extensions,else False.")
-> 2058 self.df = dataframe.drop_duplicates(x_col)
2059 self.df[x_col] = self.df[x_col].astype(str)
2060 self.directory = directory
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in drop_duplicates(self, subset, keep, inplace)
4329 """
4330 inplace = validate_bool_kwarg(inplace, 'inplace')
-> 4331 duplicated = self.duplicated(subset, keep=keep)
4332
4333 if inplace:
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in duplicated(self, subset, keep)
4379 diff = Index(subset).difference(self.columns)
4380 if not diff.empty:
-> 4381 raise KeyError(diff)
4382
4383 vals = (col.values for name, col in self.iteritems()
KeyError: Index(['filename'], dtype='object')
What is going wrong? I have tried multiple things to fix this but cannot figure out why this is happening.
python image-processing machine-learning keras
add a comment |
up vote
0
down vote
favorite
up vote
0
down vote
favorite
I am trying to build an image classifier with keras and the size of my dataset requires me to use the ImageDataGenerator class along with its flow_from_dataframe method. This is the code I am using.
train_datagen = keras.preprocessing.image.ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
train_generator = train_datagen.flow_from_dataframe(
directory='stage_1_train_images/',
dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
target_size=(1024, 1024))
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D((2, 2),padding='same'))
model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Flatten())
model.add(Dense(128, activation='linear'))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(lr=1000,decay=.99),
metrics=['accuracy'])
model.fit_generator(trainGen, steps_per_epoch=1024/16, epochs=317)
However, when I run this code, I get the following error
KeyError Traceback (most recent call last)
<ipython-input-7-5a88afda8de5> in <module>
7 directory='stage_1_train_images/',
8 dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
----> 9 target_size=(1024, 1024))
10 model = Sequential()
11 model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
/opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in flow_from_dataframe(self, dataframe, directory, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format, subset, interpolation)
1105 save_format=save_format,
1106 subset=subset,
-> 1107 interpolation=interpolation)
1108
1109 def standardize(self, x):
/opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in __init__(self, dataframe, directory, image_data_generator, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, data_format, save_to_dir, save_prefix, save_format, follow_links, subset, interpolation, dtype)
2056 raise ValueError("has_ext must be either True if filenames in"
2057 " x_col has extensions,else False.")
-> 2058 self.df = dataframe.drop_duplicates(x_col)
2059 self.df[x_col] = self.df[x_col].astype(str)
2060 self.directory = directory
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in drop_duplicates(self, subset, keep, inplace)
4329 """
4330 inplace = validate_bool_kwarg(inplace, 'inplace')
-> 4331 duplicated = self.duplicated(subset, keep=keep)
4332
4333 if inplace:
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in duplicated(self, subset, keep)
4379 diff = Index(subset).difference(self.columns)
4380 if not diff.empty:
-> 4381 raise KeyError(diff)
4382
4383 vals = (col.values for name, col in self.iteritems()
KeyError: Index(['filename'], dtype='object')
What is going wrong? I have tried multiple things to fix this but cannot figure out why this is happening.
python image-processing machine-learning keras
I am trying to build an image classifier with keras and the size of my dataset requires me to use the ImageDataGenerator class along with its flow_from_dataframe method. This is the code I am using.
train_datagen = keras.preprocessing.image.ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
train_generator = train_datagen.flow_from_dataframe(
directory='stage_1_train_images/',
dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
target_size=(1024, 1024))
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D((2, 2),padding='same'))
model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Flatten())
model.add(Dense(128, activation='linear'))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(lr=1000,decay=.99),
metrics=['accuracy'])
model.fit_generator(trainGen, steps_per_epoch=1024/16, epochs=317)
However, when I run this code, I get the following error
KeyError Traceback (most recent call last)
<ipython-input-7-5a88afda8de5> in <module>
7 directory='stage_1_train_images/',
8 dataframe=box.drop(labels=['patientId'], axis=1).replace(to_replace=float('nan'),value=0),
----> 9 target_size=(1024, 1024))
10 model = Sequential()
11 model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
/opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in flow_from_dataframe(self, dataframe, directory, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format, subset, interpolation)
1105 save_format=save_format,
1106 subset=subset,
-> 1107 interpolation=interpolation)
1108
1109 def standardize(self, x):
/opt/conda/lib/python3.6/site-packages/keras_preprocessing/image.py in __init__(self, dataframe, directory, image_data_generator, x_col, y_col, has_ext, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, data_format, save_to_dir, save_prefix, save_format, follow_links, subset, interpolation, dtype)
2056 raise ValueError("has_ext must be either True if filenames in"
2057 " x_col has extensions,else False.")
-> 2058 self.df = dataframe.drop_duplicates(x_col)
2059 self.df[x_col] = self.df[x_col].astype(str)
2060 self.directory = directory
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in drop_duplicates(self, subset, keep, inplace)
4329 """
4330 inplace = validate_bool_kwarg(inplace, 'inplace')
-> 4331 duplicated = self.duplicated(subset, keep=keep)
4332
4333 if inplace:
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in duplicated(self, subset, keep)
4379 diff = Index(subset).difference(self.columns)
4380 if not diff.empty:
-> 4381 raise KeyError(diff)
4382
4383 vals = (col.values for name, col in self.iteritems()
KeyError: Index(['filename'], dtype='object')
What is going wrong? I have tried multiple things to fix this but cannot figure out why this is happening.
python image-processing machine-learning keras
python image-processing machine-learning keras
edited Oct 18 at 21:45
asked Oct 16 at 1:56
Dhruv Chanana
12
12
add a comment |
add a comment |
1 Answer
1
active
oldest
votes
up vote
0
down vote
As per the documentation here, you need to specify x_col
and y_col
as arguments in the flow_from_dataframe
method. The defaults for x_col
and y_col
are 'filename' and 'class', respectively. From the error, I'm guessing that you don't have a column named "filename"
in your DataFrame which is what causes the KeyError
. To fix this, specify the following two arguments in the flow_from_dataframe
method.
x_col: string, column in the dataframe that contains
the filenames of the target images.
y_col: string or list of strings,columns in
the dataframe that will be the target data.
add a comment |
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
0
down vote
As per the documentation here, you need to specify x_col
and y_col
as arguments in the flow_from_dataframe
method. The defaults for x_col
and y_col
are 'filename' and 'class', respectively. From the error, I'm guessing that you don't have a column named "filename"
in your DataFrame which is what causes the KeyError
. To fix this, specify the following two arguments in the flow_from_dataframe
method.
x_col: string, column in the dataframe that contains
the filenames of the target images.
y_col: string or list of strings,columns in
the dataframe that will be the target data.
add a comment |
up vote
0
down vote
As per the documentation here, you need to specify x_col
and y_col
as arguments in the flow_from_dataframe
method. The defaults for x_col
and y_col
are 'filename' and 'class', respectively. From the error, I'm guessing that you don't have a column named "filename"
in your DataFrame which is what causes the KeyError
. To fix this, specify the following two arguments in the flow_from_dataframe
method.
x_col: string, column in the dataframe that contains
the filenames of the target images.
y_col: string or list of strings,columns in
the dataframe that will be the target data.
add a comment |
up vote
0
down vote
up vote
0
down vote
As per the documentation here, you need to specify x_col
and y_col
as arguments in the flow_from_dataframe
method. The defaults for x_col
and y_col
are 'filename' and 'class', respectively. From the error, I'm guessing that you don't have a column named "filename"
in your DataFrame which is what causes the KeyError
. To fix this, specify the following two arguments in the flow_from_dataframe
method.
x_col: string, column in the dataframe that contains
the filenames of the target images.
y_col: string or list of strings,columns in
the dataframe that will be the target data.
As per the documentation here, you need to specify x_col
and y_col
as arguments in the flow_from_dataframe
method. The defaults for x_col
and y_col
are 'filename' and 'class', respectively. From the error, I'm guessing that you don't have a column named "filename"
in your DataFrame which is what causes the KeyError
. To fix this, specify the following two arguments in the flow_from_dataframe
method.
x_col: string, column in the dataframe that contains
the filenames of the target images.
y_col: string or list of strings,columns in
the dataframe that will be the target data.
answered Nov 21 at 18:54
UserAnon
114
114
add a comment |
add a comment |
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f52826906%2fkeras-imagedatagenerator-flow-from-dataframe-returning-keyerror%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown