I try to the process below.
- spatial search tile raster index polygon with feature
- rasterize feature
- combine rasterized feature and MODIS raster and export combine raster attribute table (loop for tiles)
- table operation with pandas and arcpy
This process works well.However, process gradually slow down.
I try this process for 51 times and check time. All input features are same.
The result is below.Each time_1,time_2 time_3 is described below.
time_1:finish process 2(rasterize feature)
time_2:finish process 3(combine and export table)
time_3:finish process 4(table operation)
elapsed_time:finish 1 feature process(save output in gdb file)
INFO:root:land_00244 1/51 START
INFO:root:time_1:2.9405651092529297[sec]
INFO:root:time_2:3.73823618888855[sec]
INFO:root:time_2:4.30132269859314[sec]
INFO:root:time_2:4.817508697509766[sec]
INFO:root:time_2:5.239825248718262[sec]
INFO:root:time_3:9.50986933708191[sec]
INFO:root:elapsed_time:10.557835817337036[sec]
INFO:root:land_00244_49 51/51 START
INFO:root:time_1:14.089014768600464[sec]
INFO:root:time_2:19.874730348587036[sec]
INFO:root:time_2:25.050605058670044[sec]
INFO:root:time_2:30.117022275924683[sec]
INFO:root:time_2:35.16779851913452[sec]
INFO:root:time_3:51.1801700592041[sec]
INFO:root:elapsed_time:53.90104866027832[sec]
Even though same feature, process time increased 5 times!
All processes slow down.Why?
My environment is below.
OS:windows 10 pro
ArcGIS:10.5.1
ArcGIS pro:2.0
But, in windows 7 and ArcGIS(10.4.1 and pro 1.4), this slow down didn't occur.
Thanks in advance!
from __future__ import print_function, unicode_literals, absolute_import
try:
import urllib2
except ImportError:
import urllib.request as urllib2
import pandas as pd
import arcpy
import sys
import os
import socket
import gc
import time
import numpy as np
import logging
arcpy.env.overwriteOutput = True
DATA_DIR = r"C:\Data"
WORK_DIR = r"C:\Work01"
MODIS = DATA_DIR + "\\" + "MODIS_2001.gdb"
INDEX_POLY = DATA_DIR + "\\" + "MODIS_index.gdb"
def proc_zonalStat(feature_lyr, feature_path, list_index):
arcpy.env.workspace = "in_memory"
arcpy.SelectLayerByLocation_management(list_index[0], "INTERSECT", feature_lyr)
raslist_modis = []
cursor = arcpy.da.SearchCursor(list_index[0], ["F_NAME"])
for row in cursor:
raslist_modis.append(MODIS + "\\" + row[0])
del cursor
ras_feature = "in_memory"+ "\\ras_feature"
arcpy.env.outputCoordinateSystem = arcpy.SpatialReference(4326)
arcpy.PolygonToRaster_conversion(feature_lyr, "OBJECTID", ras_feature, cellsize=0.0041666667)
time_1 = time.time() - start_time
logging.info("time_1:{0}".format(time_1) + "[sec]")
count = 0
aggreFlg = True
for inValueRaster in raslist_modis:
try:
tableName = "MODIS_tbl" + str(count).zfill(3)
outCombine = arcpy.sa.Combine([ras_feature, inValueRaster])
arcpy.TableToTable_conversion(outCombine, scratchGdb, tableName)
arcpy.Delete_management(outCombine)
count += 1
time_2 = time.time() - start_time
logging.info("time_2:{0}".format(time_2) + "[sec]")
fieldName = "MODIS_254"
expression = "0"
arcpy.CalculateField_management(feature_lyr, fieldName, expression, "PYTHON_9.3")
arcpy.Delete_management(ras_feature)
tables = arcpy.ListTables("MODIS_tbl*")
list_df_MODIS = []
fields = ['ras_feature', 'MODIS_landCover_', 'COUNT']
for table in tables:
arr = arcpy.da.TableToNumPyArray(table, fields)
df = pd.DataFrame(arr, columns=fields)
list_df_MODIS.append(df)
del arr
gc.collect()
del tables
gc.collect()
df_all = pd.concat(list_df_MODIS, ignore_index=True)
add = pd.DataFrame([[0,0,0],
[0,1,0],
[0,2,0],
[0,3,0],
[0,4,0],
[0,5,0],
[0,6,0],
[0,7,0],
[0,8,0],
[0,9,0],
[0,10,0],
[0,11,0],
[0,12,0],
[0,13,0],
[0,14,0],
[0,15,0],
[0,16,0],
[0,254,0]],
columns=['ras_feature', 'MODIS_landCover_', 'COUNT'])
df_all = df_all.append(add)
del add
grouped = df_all.groupby([df_all['ras_feature'], df_all['MODIS_landCover_']], as_index=False).sum()
arr_MODIS = grouped.pivot_table(index='ras_feature', columns=['MODIS_landCover_'],values=['MODIS_landCover_','COUNT'],fill_value=0)
arr_MODIS = arr_MODIS.reset_index().values
arr_MODIS = arr_MODIS.astype(np.float32)
struct_array = np.core.records.fromarrays(arr_MODIS.transpose(),
np.dtype([('Value', np.uint64),
('MODIS_0', np.float32),
('MODIS_1', np.float32),
('MODIS_2', np.float32),
('MODIS_3', np.float32),
('MODIS_4', np.float32),
('MODIS_5', np.float32),
('MODIS_6', np.float32),
('MODIS_7', np.float32),
('MODIS_8', np.float32),
('MODIS_9', np.float32),
('MODIS_10', np.float32),
('MODIS_11', np.float32),
('MODIS_12', np.float32),
('MODIS_13', np.float32),
('MODIS_14', np.float32),
('MODIS_15', np.float32),
('MODIS_16', np.float32),
('MODIS_254', np.float32)]))
tmp_table = tmp_proc_Gdb + "\\tmp_table"
arcpy.da.NumPyArrayToTable(struct_array, tmp_table)
del struct_array, grouped, arr_MODIS, df_all, list_df_MODIS
gc.collect()
tmp_table_view = tmp_proc_Gdb + "\\tmp_table_view"
arcpy.MakeTableView_management(tmp_table, tmp_table_view, "NOT Value = 0")
arcpy.AddJoin_management(feature_lyr, "OBJECTID", tmp_table_view, "Value")
feature_name = os.path.basename(feature_path)
for i in range(0,17):
fieldName = feature_name + ".MODIS_" + str(i)
expression = "!tmp_table.MODIS_" + str(i) + "!"
arcpy.CalculateField_management(feature_lyr, fieldName, expression, "PYTHON_9.3")
fieldName = feature_name + ".MODIS_254"
expression = "!tmp_table.MODIS_254!"
arcpy.CalculateField_management(feature_lyr, fieldName, expression, "PYTHON_9.3")
arcpy.RemoveJoin_management(feature_lyr)
arcpy.Delete_management(tmp_table_view)
arcpy.Delete_management(tmp_table)
time_3 = time.time() - start_time
logging.info("time_3:{0}".format(time_3) + "[sec]")
arcpy.env.scratchWorkspace = "in_memory"
THREAD = 0
hostName = socket.gethostname()
local_outGdb_name = 'tmp_' + hostName + '_' + str(THREAD).zfill(2) + '.gdb'
scratchGdb = "in_memory"
local_outGdb = WORK_DIR + '\\' + local_outGdb_name
tmp_proc_Gdb = "in_memory"
logfile_name = WORK_DIR + "\\log_test.txt"
logging.basicConfig(filename=logfile_name,level=logging.DEBUG)
logging.info("************** MAIN PROCESS START **************")
arcpy.env.workspace = WORK_DIR
for gdb in arcpy.ListFiles("test.gdb"):
if not arcpy.Exists(WORK_DIR + '\\' + local_outGdb_name):
arcpy.CreateFileGDB_management(WORK_DIR, local_outGdb_name)
if arcpy.Exists(WORK_DIR + '\\Res\\' + gdb):
arcpy.env.workspace = WORK_DIR + '\\Res\\' + gdb
features_end = arcpy.ListFeatureClasses()
arcpy.env.workspace = WORK_DIR + '\\' + gdb
features_tmp = arcpy.ListFeatureClasses()
features = [ feature for feature in features_tmp if feature not in features_end ]
else:
arcpy.env.workspace = WORK_DIR + '\\' + gdb
features = arcpy.ListFeatureClasses()
totalNum = len(features)
counter = 0
index_MODIS_lyr = "in_memory" + "\\" + "idx_MODIS_lyr"
list_index = [index_MODIS_lyr]
arcpy.MakeFeatureLayer_management(INDEX_POLY + "\\MODIS_landCover_2001", index_MODIS_lyr)
for feature in features:
try:
start_time = time.time()
arcpy.env.workspace = WORK_DIR + '\\' + gdb
arcpy.env.parallelProcessingFactor = "100%"
counter += 1
logging.info(feature+" "+str(counter) + '/' + str(totalNum)+" START")
tmp_feature = tmp_proc_Gdb + "\\tmp_feature"
arcpy.CopyFeatures_management(feature, tmp_feature)
feature_lyr = tmp_proc_Gdb + "\\feature_lyr"
arcpy.MakeFeatureLayer_management(tmp_feature, feature_lyr)
rows = arcpy.GetCount_management(feature_lyr)
countFeature = int(rows.getOutput(0))
del rows
arcpy.AddField_management(feature_lyr, "MODIS_0" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_1" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_2" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_3" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_4" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_5" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_6" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_7" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_8" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_9" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_10" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_11" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_12" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_13" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_14" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_15" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_16" , "FLOAT")
arcpy.AddField_management(feature_lyr, "MODIS_254" , "FLOAT")
proc_zonalStat(feature_lyr, tmp_feature, list_index)
out_feature = local_outGdb + "\\" + feature
arcpy.CopyFeatures_management(tmp_feature, out_feature)
arcpy.Delete_management(feature_lyr)
arcpy.Delete_management(tmp_feature)
elapsed_time = time.time() - start_time
logging.info("elapsed_time:{0}".format(elapsed_time) + "[sec]")
except arcpy.ExecuteError:
logging.info(arcpy.GetMessages(2))
arcpy.Delete_management("in_memory")
arcpy.MakeFeatureLayer_management(INDEX_POLY + "\\MODIS_landCover_2001", index_MODIS_lyr)
outDir = WORK_DIR + "\\Res"
outGdbPath = outDir + "\\" + gdb
if not arcpy.Exists(outGdbPath):
arcpy.CreateFileGDB_management(outDir, gdb)
arcpy.env.workspace = local_outGdb
features = arcpy.ListFeatureClasses()
arcpy.FeatureClassToGeodatabase_conversion(features, outGdbPath)
arcpy.Delete_management(local_outGdb)
logging.info("************** MAIN PROCESS E N D **************")