Good Morning,
I have created a geoprocessing tool and deployed it to esri server (enterprise portal). The script does the following:
- asks the user to enter the URL for the rest endpoint of a feature service
- script takes that feature service, runs various processes on it in order to get the data into a pandas dataframe
- convert pandas dataframe to csv
The tricky part is getting that csv to the user.
Any help/guidance is greatly appreciated.
Here is the code for the script:
import json
import urllib2
import urllib
import os
from datetime import datetime
import time
import pandas as pd
import numpy as np
import arcpy
todays_date = datetime.date(datetime.now()).strftime("%Y%m%d")
def get_json(url, params):
request = urllib2.Request(url, params)
response = urllib2.urlopen(request)
json_response = response.read()
cont = json.loads(json_response.decode('utf-8'))
return cont
def get_json_with_query(url, query):
final_url = url + query
request = urllib2.Request(final_url)
response = urllib2.urlopen(request)
json_response = response.read()
cont = json.loads(json_response.decode('utf-8'))
return cont
def df_to_csv(df, filename):
df.to_csv(filename, sep=',', index=False, encoding='utf-8')
arcpy.AddMessage(' ')
arcpy.AddMessage('csv creation complete')
return
def convert_floats_to_ints(df):
df[df.select_dtypes(['float64']).columns] = df.select_dtypes(['float64']).apply(lambda x: x.astype(int))
return df
def get_date_column_names(fs):
column_names = []
for item in fs['fields']:
if item['type'] == 'esriFieldTypeDate':
column_names.append(item['name'])
return column_names
def convert_date(col_val):
if col_val:
try:
conv_date = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(col_val / 1000.))
return conv_date
except Exception as e:
return col_val
else:
return col_val
def fs_to_dataframe(fs):
import pandas as pd
data_dict = dict()
count = 0
while count <= len(fs['features']):
for item in fs['features']:
data_dict[count] = item['attributes']
count += 1
df = pd.DataFrame.from_dict(data_dict, orient='index')
df.fillna('', inplace=True)
df.drop_duplicates(inplace=True)
return df
def dataframe_to_csv(df, layer_filename):
arcpy.AddMessage(' ')
arcpy.AddMessage('creating ' + layer_filename)
df_to_csv(df, layer_filename)
return
def layers_to_csv(cont, fs_url, csv_path):
arcpy.AddMessage(' ')
arcpy.AddMessage('converting layers to csv files')
for layer in cont['layers']:
url = fs_url + '/' + str(layer['id'])
fs = get_fs(url)
layer_to_csv(fs, url, csv_path)
return
def tables_to_csv(cont, fs_url, csv_path):
arcpy.AddMessage(' ')
arcpy.AddMessage('converting tables to csv files')
for layer in cont['tables']:
url = fs_url + '/' + str(layer['id'])
fs = get_fs(url)
layer_to_csv(fs, url, csv_path)
return
def get_fs(fs_url):
params = urllib.urlencode({'f': 'pjson'})
cont = get_json(fs_url, params)
return cont
def get_max_record_count(feature_service):
return feature_service['maxRecordCount']
def get_total_record_count(feature_service_url):
total_records_count_query = 'query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=&returnGeometry=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=true&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson'
url = feature_service_url + '/'
count = get_json_with_query(url, total_records_count_query)
arcpy.AddMessage(' ')
arcpy.AddMessage('There are ' + str(count['count']) + ' total records for this layer.')
return count['count']
def layer_to_csv(layer, fs_url, csv_path):
fs_max = get_max_record_count(layer)
fs_ttl = get_total_record_count(fs_url)
layer_filename = layer['name'] + '_' + todays_date + '.csv'
filename = os.path.join(csv_path, layer_filename)
objectid = fs_max
objid1 = 0
objid2 = fs_max
colNames = get_date_column_names(layer)
dataframe = []
url = fs_url + '/'
step = 1
if fs_ttl < fs_max:
default_query = 'query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson'
arcpy.AddMessage(' ')
arcpy.AddMessage(str(fs_ttl) + ' records to download...')
arcpy.AddMessage(' ')
fs = get_json_with_query(url, default_query)
df = fs_to_dataframe(fs)
for col in colNames:
if df[col].dtype == np.int64:
df[col] = df[col].apply(convert_date)
dataframe_to_csv(df, filename)
arcpy.AddMessage(' ')
arcpy.AddMessage('layer to csv process complete')
return
else:
while fs_ttl > 0:
arcpy.AddMessage(' ')
arcpy.AddMessage(str(fs_ttl) + ' records to download...')
arcpy.AddMessage(' ')
objectid_query = 'query?where=objectid+>+' + str(objid1) + '+and+objectid+<%3D+' + str(
objid2) + '&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson'
fs = get_json_with_query(url, objectid_query)
df = fs_to_dataframe(fs)
for col in colNames:
if df[col].dtype == np.int64:
df[col] = df[col].apply(convert_date)
dataframe.append(df)
objectid += fs_max
objid1 += fs_max
objid2 += fs_max
fs_ttl -= fs_max
step += 1
df_merged = pd.concat(dataframe)
df_merged.reset_index(drop=True, inplace=True)
dataframe_to_csv(df_merged, filename)
arcpy.AddMessage(' ')
arcpy.AddMessage('layer to csv process complete')
return
class Toolbox(object):
def __init__(self):
"""Define the toolbox (the name of the toolbox is the name of the
.pyt file)."""
self.label = "rest_to_csv"
self.alias = "resttocsv"
# List of tool classes associated with this toolbox
self.tools = [RESTtoCSV]
class RESTtoCSV(object):
def __init__(self):
"""Define the tool (tool name is the name of the class)."""
self.label = "RESTtoCSV"
self.description = "By providing the URL to the REST endpoint of the Feature Service this script will create a csv file for all layers and tables that are within the Feature Service"
self.canRunInBackground = False
def getParameterInfo(self):
"""Define parameter definitions"""
# Input Features parameter
in_features = arcpy.Parameter(
displayName="Feature Service URL",
name="feature_service_url",
datatype="String",
parameterType="Required",
direction="Input")
save_location = arcpy.Parameter(
displayName="Save CSV to...",
name="csv_save_location",
datatype="DEFolder",
parameterType="Required",
direction="Input")
parameters = [in_features,save_location]
return parameters
def isLicensed(self):
"""Set whether tool is licensed to execute."""
return True
def updateParameters(self, parameters):
"""Modify the values and properties of parameters before internal
validation is performed. This method is called whenever a parameter
has been changed."""
return
def updateMessages(self, parameters):
"""Modify the messages created by internal validation for each tool
parameter. This method is called after internal validation."""
return
def execute(self, parameters, messages):
"""The source code of the tool."""
fs_url = parameters[0].valueAsText
csv_path = parameters[1].valueAsText
#default_query = 'query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson'
fs = get_fs(fs_url)
if 'layers' in fs:
layers_to_csv(fs,fs_url,csv_path)
if 'tables' in fs:
tables_to_csv(fs,fs_url,csv_path)
if 'id' in fs:
layer_to_csv(fs,fs_url,csv_path)
return