Script created by Chris Snyder; modified by C. Denninger # Script resulted from ESRI Forum posts dated 04FEB2010. # This script is used in a script tool within ArcGIS and calculates #..the median value of one entire field and posts that single median #...value to every row in one other entire field. import sys, arcgisscripting gp = arcgisscripting.create(9.3) inputTable = gp.GetParameterAsText(0) readField = gp.GetParameterAsText(1) #this needs to be a numeric field writeField = gp.GetParameterAsText(2) #this needs to be a double or float valueList = [] searchRows = gp.searchcursor(inputTable) searchRow = searchRows.next() while searchRow: searchRowValue = searchRow.getvalue(readField) if searchRowValue == None: pass #don't add a null value to the list! else: valueList.append(searchRowValue) searchRow = searchRows.next() del searchRow del searchRows valueList.sort() listLength = len(valueList) if listLength == 0: print "Every value was null! Exiting script..."; sys.exit() elif listLength % 2 == 0: #even, so get the mean of the 2 center values medianValue = (valueList[listLength / 2] + valueList[listLength / 2 - 1]) / 2.0 else: #odd, so it's easy! medianValue = valueList[listLength / 2] updateRows = gp.updatecursor(inputTable) updateRow = updateRows.next() while updateRow: updateRow.setvalue(writeField, medianValue) updateRows.UpdateRow(updateRow) updateRow = updateRows.next() del updateRow del updateRows
import arcpy, sys, os inputTable = arcpy.GetParameterAsText(0) readField = arcpy.GetParameterAsText(1) writeField = arcpy.GetParameterAsText(2) valueList = [] searchRows = arcpy.da.SearchCursor(inputTable) searchRow = searchRows.next() while searchRow: searchRowValue = searchRow.getValue(readField) if searchRowValue == None: pass #don't add a null value to the list! else: valueList.append(searchRowValue) searchRow = searchRows.next() del seachRow del searchRows valueList.sort listLength = len(valueList) if listLength == 0: print "Every value was null! Exiting script..."; sys.exit() elif listLength % 2 == 0: #even, so get the mean of the 2 center values medianValue = (valueList[listLength / 2] + valueList[listLength / 2 - 1]) / 2.0 else: medianValue = valueList[listLength / 2] updateRows = arcpy.da.UpdateCursor(inputTable) updateRow = updateRows.next() while updateRow: updateRow.setvalue(writeField, medianValue) updateRows.UpdateRow(updateRow) updateRow = updateRows.next() del updateRow del updateRows
Solved! Go to Solution.
valueList.sort listLength = len(valueList) if listLength == 0: print "Every value was null! Exiting script..."; sys.exit() elif listLength % 2 == 0: #even, so get the mean of the 2 center values medianValue = (valueList[listLength / 2] + valueList[listLength / 2 - 1]) / 2.0 else: medianValue = valueList[listLength / 2]
medianValue = numpy.median(valueList)
if numpy.isnan(medianValue): print "Every value was null! Exiting script..." sys.exit()
with arcpy.da.SearchCursor(inputTable, [readField]) as searchRows: for searchRow in searchRows: if searchRow[0] != None: valueList.append(searchRow[0]) del searchRow del searchRows
with arcpy.da.UpdateCursor(inputTable, [writeField]) as updateRows: for row in updateRows: row[0] = medianValue updateRows.updateRow(row) del row del updateRows
''' Written By Caleb Mackey 4/17/2013 Calculates Median Statistics ''' import arcpy, os, sys, traceback # env settings arcpy.env.overwriteOutput = True arcpy.env.qualifiedFieldNames = False def GetMedian(in_list): sorted_list = sorted(in_list) median = int(round(len(sorted_list) / 2)) if len(sorted_list)%2==0: med_val = float(sorted_list[median-1] + sorted_list[median]) / 2 else: med_val = sorted_list[median] return med_val def GetMedianValues(source_fc, new_table, case_field, value_field): ''' Generates a table with Median Values, summarized by case_field. If the goal is to get the median for the entire table, use a case field that has the same value for all records. source_fc - input feature class to compute median statistics for new_table - output table case_field - similar to dissolve field, computes stats based on unique values in this field value_field - field that contains the actual values for statistics; must be numeric ''' # Get unique value list for query print 'starting cursor' with arcpy.da.SearchCursor(source_fc, [case_field]) as rows: un_vals = list(set(r[0] for r in rows)) lyr = arcpy.MakeFeatureLayer_management(source_fc,'source_layer') values = {} # Get Median UseValue for each station name for st in un_vals: query = '"{0}" = \'{1}\''.format(case_field, st) arcpy.SelectLayerByAttribute_management(lyr, 'NEW_SELECTION', query) use_vals = [] with arcpy.da.SearchCursor(lyr, [value_field]) as rows: for row in rows: if row[0] != None: use_vals.append(row[0]) if len(use_vals) > 0: median = GetMedian(use_vals) values[st] = [median, len(use_vals)] # Create new Summary Statistics table with median # if arcpy.Exists(new_table): arcpy.Delete_management(new_table) arcpy.CreateTable_management(os.path.split(new_table)[0],os.path.basename(new_table)) # Get field names and types for field in arcpy.ListFields(source_fc): if field.name in [case_field, value_field]: ftype = field.type name = field.name length = field.length pres = field.precision scale = field.scale if name == value_field: if new_table.endswith('.dbf'): name = 'MED_' + value_field[:6] else: name = 'MED_' + value_field value_field2 = name arcpy.AddField_management(new_table,name,ftype,pres,scale,length) # Add frequency field arcpy.AddField_management(new_table,'FREQUENCY','LONG') # Insert rows with arcpy.da.InsertCursor(new_table, [case_field, value_field2, 'FREQUENCY']) as rows: for k,v in sorted(values.iteritems()): rows.insertRow((k, v[0], v[1])) # report results print 'Created %s' %os.path.basename(new_table) arcpy.AddMessage('Created %s' %os.path.basename(new_table)) # .dbf's are automatically given a 'Field1' field...Clean this up try: if new_table.endswith('.dbf'): arcpy.DeleteField_management(new_table, 'Field1') except: pass print 'Done' if __name__ == '__main__': ## # testing ## source_fc = r'C:\Testing\Test.gdb\CSR_by_TWP' #### new_table = r'C:\Testing\Test.gdb\Median_CSR' #gdb test ## new_table = r'C:\Testing\Median_CSR.dbf' #dbf test ## case_field = 'NAME' ## value_field = 'AVE_CSR' # Script tool params source_fc = arcpy.GetParameterAsText(0) new_table = arcpy.GetParameterAsText(1) case_field = arcpy.GetParameterAsText(2) value_field = arcpy.GetParameterAsText(3) GetMedianValues(source_fc, new_table, case_field, value_field)
Thanks for posting this, very helpful! I have updated it to Python 3.x, which is what ArcGIS Pro ships with:
'''
Written By Caleb Mackey
4/17/2013
Updated to Python 3.x (ArcGIS Pro 1.0) by Katja Krivoruchko
4/16/2015
Calculates Median Statistic
'''
import arcpy, os, sys, traceback
# env settings
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False
def GetMedian(in_list):
sorted_list = sorted(in_list)
median = int(round(len(sorted_list) / 2))
if len(sorted_list)%2==0:
med_val = float(sorted_list[median-1]
+ sorted_list[median]) / 2
else:
med_val = sorted_list[median]
return med_val
def GetMedianValues(source_fc, new_table, case_field, value_field):
''' Generates a table with Median Values, summarized by case_field. If the
goal is to get the median for the entire table, use a case field that has
the same value for all records.
source_fc - input feature class to compute median statistics for
new_table - output table
case_field - similar to dissolve field, computes stats based on unique values in this field
value_field - field that contains the actual values for statistics; must be numeric
'''
# Get unique value list for query
with arcpy.da.SearchCursor(source_fc, [case_field]) as rows:
un_vals = list(set(r[0] for r in rows))
lyr = arcpy.MakeFeatureLayer_management(source_fc,'source_layer')
values = {}
# Get Median UseValue for each station name
for st in un_vals:
query = '"{0}" = \'{1}\''.format(case_field, st)
arcpy.SelectLayerByAttribute_management(lyr, 'NEW_SELECTION', query)
use_vals = []
with arcpy.da.SearchCursor(lyr, [value_field]) as rows:
for row in rows:
if row[0] != None:
use_vals.append(row[0])
if len(use_vals) > 0:
median = GetMedian(use_vals)
values[st] = [median, len(use_vals)]
# Create new Summary Statistics table with median
#
if arcpy.Exists(new_table):
arcpy.Delete_management(new_table)
arcpy.CreateTable_management(os.path.split(new_table)[0],os.path.basename(new_table))
# Get field names and types
for field in arcpy.ListFields(source_fc):
if field.name in [case_field, value_field]:
ftype = field.type
name = field.name
length = field.length
pres = field.precision
scale = field.scale
if name == value_field:
if new_table.endswith('.dbf'):
name = 'MED_' + value_field[:6]
else:
name = 'MED_' + value_field
value_field2 = name
arcpy.AddField_management(new_table,name,ftype,pres,scale,length)
# Add frequency field
arcpy.AddField_management(new_table,'FREQUENCY','LONG')
# Insert rows
with arcpy.da.InsertCursor(new_table, [case_field, value_field2, 'FREQUENCY']) as rows:
for k,v in sorted(values.items()):
rows.insertRow((k, v[0], v[1]))
# report results
print ('Created %s' %os.path.basename(new_table) )
arcpy.AddMessage('Created %s' %os.path.basename(new_table))
# .dbf's are automatically given a 'Field1' field...Clean this up
try:
if new_table.endswith('.dbf'):
arcpy.DeleteField_management(new_table, 'Field1')
except:
pass
#print 'Done'
if __name__ == '__main__':
# Script tool params
source_fc = arcpy.GetParameterAsText(0)
new_table = arcpy.GetParameterAsText(1)
case_field = arcpy.GetParameterAsText(2)
value_field = arcpy.GetParameterAsText(3)
GetMedianValues(source_fc, new_table, case_field, value_field)
Thank you Caleb...worked like a charm! I naively thought that 'median' was just going to be a simple calculation function in the Field Calculator...thank god for your script!
No problem, glad to see this is still getting used!
valueList.sort listLength = len(valueList) if listLength == 0: print "Every value was null! Exiting script..."; sys.exit() elif listLength % 2 == 0: #even, so get the mean of the 2 center values medianValue = (valueList[listLength / 2] + valueList[listLength / 2 - 1]) / 2.0 else: medianValue = valueList[listLength / 2]
medianValue = numpy.median(valueList)
if numpy.isnan(medianValue): print "Every value was null! Exiting script..." sys.exit()
Once numeric values are obtained from a field, via whatever means, medians are relatively easy to determine using numpy even if the field contains NaN's
>>> import numpy as np >>> a = np.array([1,2,np.NaN,3.,4,np.NaN],dtype='float64') >>> np.median(a) 3.5 >>> np.nanmedian(a) 2.5 >>>
that's why I love numpy and panda