4

I have a script processing several geodatabases and, at the end, creating some shapefiles. During this process, an intemediary (local) GDB is created and some selected datasets/feature classes are exported. Then a bunch of more processing is done, including deleting some unneccessary fields or creating some new ones. Some times, the script just runs perfectly fine and, some other times, it just stops with an exception. The 'where and when' seems to be totally random. The different exceptions I've got are:

1) ERROR 000210: Cannot create output C:\some_path\temp.gdb\dataset_name Failed to execute (FeatureClassToFeatureClass)

2) ExecuteError: ERROR 000229: Cannot open C:\some_path\temp.gdb\dataset_name Failed to execute (DeleteField)

These errors occur at different points on the script, with exactly the same input GDB and data and it seems to be totally random when and where they occur.

I'm 99% sure, that the names of the feature classes, datasets and paths are not the cause of this random errors, because they only occur sometimes and most of the time on different feature classes with the sources being each time exactly the same. While debugging I've got 5 times consecutive errors, but I also ran the script 20 consecutive times without any error. Normally though, the errors happen approx. 1 from 3 times.

I'm including some code (modified it, trying to keep only the necessary) and a lot of comments.

This is the starting point of the script:

# 'tools.py' contains some common methods that we use in our scripts
import tools as tls
from tools import CustomException
import arcpy
# ... etc.
''' An Item tuple contains:
 - dataset:
 name of the dataset on the input sources,
 - fclass:
 fclass item (a NameAlias tuple to be used on the target)
 - fields:
 a list of field items for the "keep fields" (fields NOT on this list
 will be removed). Each field item is also a NameAlias tuple.
Example:
Item(dataset='region1', fclass=NameAlias(name='Buildings', alias=None), fields=[NameAlias(name='BdgNumber', alias='Nr'), ...])
'''
Item = namedtuple('Item', 'dataset fclass fields')
def main(options=None, args=None):
 prog = 'export.py'
 usage = ("Usage: %prog gdb_txt fc_list_txt out_dir")
 # gdb_txt: text file with the path to an SDE or file GDB per line
 # fc_list_txt: text file containing the datasets and feature classes
 # we want to process.
 # out_dir: the output directory
 parser = OptionParser(usage)
 if not options and not args:
 (options, args) = parser.parse_args()
 if len(args) != 3:
 parser.error("Incorrect number of arguments")
 parser = OptionParser(usage)
 if not options and not args:
 (options, args) = parser.parse_args()
 gdb_txt = args[0]
 gdb_lst = tls.parse_gdb_lst(gdb_txt)
 # get and check layers file
 fc_list_txt = args[1]
 err_msg = tls.check_file_exists(fc_list_txt)
 if err_msg:
 parser.error(err_msg)
 # ... parse file an get the list of items to export and a set of the
 # the datasets (to group items by datasets if we want to)
 items, ds_set = init_data(fc_list_txt)
 # create output dir (if it doesn't exist yet)
 out_dir = args[2]
 out_dir = tls.create_dir(out_dir)
 # Do the work!
 for in_gdb in gdb_lst:
 process_gdb(in_gdb, out_dir, ds_set, items)

Here is where each gdb gets processed in a loop:

def process_gdb(in_gdb, out_dir, ds_set, items):
 # Get the fully-qualified prefix from the in_gdb (this would matter
 # on e.g. an SDE GDB, on a file GDB there is no prefix)
 pfx = tls.get_full_fclass_name_pfx(in_gdb)
 out_gdb = tls.create_gdb_in_dir(in_gdb, out_dir)
 try:
 # Export our feature classes to our temp gdb
 export_feature_classes(in_gdb, out_gdb, ds_set, items, pfx)
 except CustomException as err:
 # CustomException is being used to to catch exceptions at specific
 # points and try to continue with the next GDB.
 return
 # Check and repair geometry on exported fclasses... uses:
 # arcpy.CheckGeometry_management
 # arcpy.RepairGeometry_management
 try:
 check_and_repair_geometries(out_gdb, out_dir)
 except CustomException as err:
 # do something
 return
 # Here we check if everything on items got exported and update items
 # accordingly
 env.workspace = out_gdb
 exported = arcpy.ListFeatureClasses()
 items = get_exported_list(exported, items)
 try:
 remove_non_keep_fields(in_gdb, out_gdb, items, stage)
 except CustomException as err:
 # do something
 return
 # ... Some more steps on the processing which are not giving trouble ...
 tls.print_msg('\n>>> ready! <<<')

Here is the export part:

def export_feature_classes(in_gdb, out_gdb, ds_set, export_lst, pfx):
 env.workspace = in_gdb
 datasets = arcpy.ListDatasets()
 for ds in ds_set:
 ds_full = tls.prepend_prefix(pfx, ds)
 # Make sure this dataset exists
 if not ds_full in datasets:
 tls.print_msg("... !! dataset '%s' not found, skipping... " % ds_full)
 continue
 items = [it for it in export_lst if it.dataset == ds]
 ds_workspace = os.path.join(in_gdb, ds_full)
 export_feature_classes_in_ds(in_gdb, out_gdb, ds_workspace, items, pfx)
def export_feature_classes_in_ds(in_gdb, out_gdb, workspace, items, pfx):
 # Set the workspace to the dataset
 env.workspace = workspace
 # Get a list of existing feature classes to make sure the fc we're
 # trying to export exists in the first place
 fc_existing = arcpy.ListFeatureClasses()
 for it in items:
 name = it.fclass.name
 # this returns the fully qualified name for the feature class
 fc_full = tls.prepend_prefix(pfx, name)
 if not fc_full in fc_existing:
 # feature class does not exist in dataset, so skip it
 continue
 fc_path = os.path.join(workspace, fc_full)
 #first check that the fclass is not empty
 cnt = int(arcpy.GetCount_management(fc_path).getOutput(0))
 if not cnt:
 # feature class is empty, so skip it
 continue
"""
Here is where Error 1) occurs:
ERROR 000210: Cannot create output C:\some_path\temp.gdb\dataset_name
Failed to execute (FeatureClassToFeatureClass)`
"""
 try:
 arcpy.FeatureClassToFeatureClass_conversion(fc_path, out_gdb, name)
 except Exception as e:
 # do something
 raise CustomException(msg)

And here is where the non-required fields are being removed from each feature class:

def remove_non_keep_fields(in_gdb, out_gdb, items):
 # get the names of the feature classes to process
 for it in items:
 fc = it.fclass.name
 keep_fields = [f.name for f in it.fields]
 try:
 rm_fields_from_list(out_gdb, fc, keep_fields)
 except Exception as error:
 # Try something desperate ... show some nice messages...
 # etc.
def rm_fields_from_list(workspace, fclass, lst):
 # set env to the out_gdb
 arcpy.env.workspace = workspace
 arcpy.env.overwriteOutput = True
 # only remove non-required fields, so check lst first and exclude
 # required ones
 to_delete = function_that_returns_a_list_of_names(lst)
"""
Here is where Error 2) occurs:
`ExecuteError: ERROR 000229: Cannot open C:\some_path\temp.gdb\dataset_name
Failed to execute (DeleteField)`
"""
 arcpy.DeleteField_management(fclass, to_delete)

I'm assuming that the errors are caused by a lock that sometimes is not released and sometimes it is. But why? How can I avoid it, or at least make a workaround?

I already made a function that will retry for 3 or more times the operations if an exception occurs before it finally gives up, but it didn't help.

UPDATE

I also tried removing any left locks by doing, as proposed on this answer, an arcpy.Exists followed by an arcpy.Compact_management by calling the clearWSLocks function 'StacyR' posted on his comment. This clearWSLocks is called before each retry in the function I mentioned above, and every time the function returns a message with "Workspace clear to continue" ... but the delete in rm_fields_from_lst keeps raising the same exception sigh

UPDATE 2

Q: Most commonly locks are left by insert/update cursors, are you using these? if so can you expand on how you're managing them.

A: On this script I'm not using any insert/update cursors. But normally I would put them on a block with with arcpy.da.<Insert/Update>Cursor as csr: to avoid any locks.

If feature class to feature class is giving you problems try CopyFeatures_management resources.arcgis.com/en/help/main/10.2/index.html#//... instead."

Ok. I will try that. But actually most of the random errors happen on #2, when trying to delete the fields that we don't want to keep. But as I said, there is absolutely no pattern for when and why is this happening. Not the number of fields, or if the feature class has many/few features inside... absolutely no pattern that I can recognize.

UPDATE 3

Ok, I think I've finally got a workaround which will probably work. I'm tracking progress of all items, and if at some stage an exception for that item is caught, then I save the failed state to the Item and continue with the others.... when I'm done with process_gdb, then I try again to process all failed items. Let's say that a max. of 3 tries for each item should avoid ending in an endless loop. That should probably work. But finding the cause of the random errors and avoiding them on the first place would still be really nice.

I'll definitely keep trying to find the cause and post any more hints (if any) here.

PolyGeo
65.5k29 gold badges115 silver badges350 bronze badges
asked Feb 1, 2017 at 19:16
8
  • What is your prefix 'pfx' and some of the error feature class names? Some names are bad in a database... are you creating a new geodatabase for each iteration or does the database already exist? I can't follow tls and items, are they custom objects? Most commonly locks are left by insert/update cursors, are you using these? if so can you expand on how you're managing them. If feature class to feature class is giving you problems try CopyFeatures_management resources.arcgis.com/en/help/main/10.2/index.html#//… instead. Commented Feb 6, 2017 at 23:03
  • Are you passing the feature class and fields as strings or the results from other Arcpy methods/GP tools (which could be other objects)? Commented Feb 7, 2017 at 12:07
  • Can you include in your code sample your variable declares for in_gdb, out_gdb, workspace, items, and pfx? Commented Feb 7, 2017 at 13:41
  • @artwork21 I added the main function and the function that calls rm_fields_from_list Commented Feb 7, 2017 at 15:55
  • 1
    I think that this question would be far more likely to attract answers if you were to overhaul it into a much more readable state. When responding to requests for clarifications I think it is far better to revise an existing paragraph than to tack on an internal Q&A. Commented Feb 9, 2017 at 5:54

1 Answer 1

1

After a lot of experiments, I ended up with a working concept, which is only a workaround and not really solving the real issue, but at least it makes it possible to run the script on a bunch of SDEs and finish the processing despite of these random errors occurring at different points.

The idea is to expand each Item like this:

Item = namedtuple('Item', 'dataset fclass fields progress')
# Example: progress = {'stages': {}, 'failed_run': False}

And then on main instead of calling directly process_gdb I give it to a try_items_operation which will try first to run all items through process_gdb and if some items failed in any stage, then the operation will be retried only for those items. If any item fails more than 3 times in the same stage, then TooManyFailsException will be raised:

import progress as pgrs
# progress is a module that handles the progress-attribute for 
# each `Item`, including: updating progress, getting progress for
# a stage, etc.
 fn = process_gdb
 kwargs = {'in_gdb': in_gdb, 'out_dir': out_dir}
 try:
 # 'items' is the property, that 'process_gdb' is 
 # expecting for recieving the items list and that
 # should be used on kwargs.
 # 'progress' is the name of the progress-property on an
 # 'Item' tuple. 
 prgs.try_items_operation(fn, items, 'progress', 'items', kwargs)
 except TooManyFailsException as err:
 tls.print_msg(str(err), error=True)
 tls.print_msg("... won't process this GDB further.")

On process_gdb I check after each step, if one or more item(s) failed for that run excluding them in further steps:

 stage = 'remove_non_keep_fields'
 remove_non_keep_fields(in_gdb, out_gdb, items, stage)
 go_on, exclude = prgs.get_items_to_go_on(items, progress_property='progress')
 # `go_on` and `exclude` contains the list of items which
 # should be further processed/excluded for this run.
 if exclude:
 # If we're excluding items that failed on this run, show a message
 tls.print_msg(prgs.get_msg_further_process_items(go_on, exclude))

Then on each stage causing trouble, I catch the exceptions and update the items progress for that stage:

def remove_non_keep_fields(in_gdb, out_gdb, items, stage):
 # get the names of the feature classes to process
 for it in items:
 fc = it.fclass.name
 keep_fields = [f.name for f in it.fields]
 try:
 rm_fields_from_list(out_gdb, fc, keep_fields)
 # If succeeded mark it on this item's progress
 progress = it.progress
 prgs.update_status(progress, stage, True)
 except Exception as error:
 # Mark failed status and continue with the next item
 progress = it.progress
 prgs.update_status(progress, stage, False, str(error))
 print('!!! failed "{}" for {}.\n'
 'Error: {}'.format(stage, fc, str(error)))

So now things just run, and because the random errors almost never occur at the same place for the same item, the script finishes processing all items after about 2 retries. Which is quite acceptable.

answered Feb 11, 2017 at 12:23

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.