Thanks all Dan and Xander,
Al last I solved the problem!
My understanding that solved the problem:
- Use separate scratch and output work-space
- Using multiprocessing is imperative if raster to process is so many(I am obliged to Dan for this concept)- it is really helpful though slow in comparison to the other multiprocessing tasks- esri engineers can tell more about it.
- Be careful about locks.
- Always clean the garbage in each work-space.
- If given multiple raster to process then arcpy.gp.Times_sa can handle more than the arcpy.sa.Times can handle.Really it is absurd that deprecated tool is more powerful that the new one (i call it a nascent babe or at least an apple of Sodom)
My understanding about arcpy in raster processing at least in their grid format:
- Arcpy can not handle task that involves more than 3000 raster at a stretch to process.
Below is my script so far:
import arcpy,os,shutil,multiprocessing,re
from arcpy.sa import Con
from arcpy.sa import Raster
arcpy.env.overwriteOutput = True
arcpy.CheckOutExtension("spatial")
try:
from openpyxl import load_workbook
except:
raise Exception("install the openpyxl module in your python system")
INPUT_TEMP_EXCEL_PATH = r"C:\test\Temprt1.xlsx"
INPUT_DEM_RASTER_PATH = r"C:\test\dem_clip_11"
OUTPUT_TEMP_RASTER_FOLDER = r"C:\test\myout1"
TEMP_FOLDER_PATH = r"C:\test\mytemp"
Temp_Data = []
temp_wb = load_workbook(filename=INPUT_TEMP_EXCEL_PATH, read_only=True)
temp_ws = temp_wb[temp_wb.sheetnames[0]]
for row in temp_ws.rows:
d = []
if len(row)>3:
for cell in row:
if cell.value == None:
pass
elif cell.value == 0:
d.append(0.000000)
elif isinstance(cell.value, float):
d.append(round(cell.value,6))
else:
d.append(cell.value)
Temp_Data.append(d)
Temp_Data= Temp_Data[1:]
seen = set()
Temp_Data = [x for x in Temp_Data if x[0] not in seen and not seen.add(x[0])]
def folder_content_deleter(folder_path):
for the_file in os.listdir(folder_path):
file_path = os.path.join(folder_path, the_file)
try:
if os.path.isfile(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path): shutil.rmtree(file_path)
except Exception as e:
pass
def purge(dirpth, pattern):
for f in os.listdir(dirpth):
if re.search(pattern, f):
pth = os.path.join(dirpth, f)
shutil.rmtree(pth, ignore_errors=True)
def gdb_content_deleter(wrkspc):
for r,d,fls in arcpy.da.Walk(wrkspc, datatype="FeatureClass"):
for f in fls:
print f
try:
arcpy.Delete_management(os.path.join(r,f))
except:
pass
def grouperByYear(input_folder_path, output_folder_path):
for dirpath, dirnames, filenames in arcpy.da.Walk(input_folder_path, topdown=True, datatype="RasterDataset", type="GRID"):
for filename in filenames:
out_folder_name = re.findall(r'(?<=\g)\d{4}', filename)[0]
out_folder_path = os.path.join(output_folder_path,out_folder_name)
if not os.path.exists(out_folder_path):
print "Creating and populating folder for year %s ......"%out_folder_name
os.mkdir(out_folder_path)
in_data = os.path.join(dirpath,filename)
ou_feature_name = 'g'+re.findall(r'(?<=\g\d{4})\d{4}$',filename)[0]
out_data = os.path.join(out_folder_path,ou_feature_name)
arcpy.Copy_management(in_data, out_data)
def times_worker(times_range_list):
scratch_db_name = "Scratch_"+str(times_range_list[0][0])
arcpy.CreateFileGDB_management(out_folder_path=TEMP_FOLDER_PATH, out_name=scratch_db_name, out_version="CURRENT")
scr_db = os.path.join(TEMP_FOLDER_PATH,scratch_db_name+".gdb")
arcpy.env.scratchWorkspace = scr_db
out_db_name = "RData_"+str(times_range_list[0][0])
out_db = os.path.join(OUTPUT_TEMP_RASTER_FOLDER,out_db_name)
if not os.path.exists(out_db):os.mkdir(out_db)
arcpy.env.workspace = out_db
for tdata in times_range_list:
T1 = float('%.6f'%tdata[1])
T2 = float('%.6f'%tdata[2])
T3 = float('%.6f'%tdata[3])
out_path = os.path.join(out_db,'g'+str(tdata[0]))
outRast_name = "in_memory\\%s"%out_db_name
arcpy.MakeRasterLayer_management(INPUT_DEM_RASTER_PATH,outRast_name)
output_second = Con(Raster(outRast_name)<2573,T1,Con(Raster(outRast_name)<=2754,T2,T3))
final_temp_raster = output_second
final_temp_raster.save(out_path)
gdb_content_deleter(scr_db)
def main(cu, worker, d_range):
pool = multiprocessing.Pool(cu)
pool.map(worker,d_range,1)
pool.close()
pool.join()
if __name__ == '__main__':
core_usage = 5
chunk_size = 1000
needed_cpu = int(round((len(Temp_Data)/chunk_size),0)+1)
offsetter = list(divmod(needed_cpu, core_usage))
cpu_distribution = [core_usage]*offsetter[0]+[offsetter[1]]
cpu_distribution = [cp for cp in cpu_distribution if cp!=0]
temp_data_range = [Temp_Data[i:i+chunk_size] for i in range(0,len(Temp_Data),chunk_size)]
print r"Doing raster math. It may take upto 3-7 hours even and may use your cpu at the heighest.\
So stop using your cpu fo this time. Go and enjoy elsewhere, let me do the job for you!.........."
loopcnt = 0
for cpu in cpu_distribution:
temp_data_range_splitted = temp_data_range[loopcnt:loopcnt+cpu]
if len(temp_data_range_splitted)>0:
main(cpu, times_worker, temp_data_range_splitted)
loopcnt+=cpu
if arcpy.Exists("in_memory"):
arcpy.Delete_management("in_memory")
folder_content_deleter(TEMP_FOLDER_PATH)
print "\nGrouping raster math output by year for you. It may take 1-2 hours at best.So stay tuned!........\n"
grouperByYear(OUTPUT_TEMP_RASTER_FOLDER, OUTPUT_TEMP_RASTER_FOLDER)
print "\nCleaning all unnecessary files........\n"
purge(OUTPUT_TEMP_RASTER_FOLDER, r'RData_[0-9]{8}')
print r"All job finished! Now you are ready for the processing:)"