# Crop Irrigation Predictor

In [1]:
import pandas as pd
import numpy as np
import boto3
from sklearn.ensemble import RandomForestRegressor
from sagemaker_studio import Project
from urllib.parse import urlparse
import math

proj = Project()
athena = boto3.client('athena')
s3 = boto3.client('s3')
s3_folder = '/athena-results/'

In [2]:
# Query Glue table
def query_glue_table():
    response = athena.start_query_execution(
        QueryString='SELECT timestamp, temp_f, rain_inches, windspeed FROM weather_data',
        QueryExecutionContext={'Database': 'sagemaker_sample_db'},
        ResultConfiguration={'OutputLocation': proj.s3.root + s3_folder}
    )
    
    query_id = response['QueryExecutionId']
    
    while True:
        result = athena.get_query_execution(QueryExecutionId=query_id)
        if result['QueryExecution']['Status']['State'] in ['SUCCEEDED', 'FAILED']:
            break
    
    results = athena.get_query_results(QueryExecutionId=query_id)
    columns = [col['Label'] for col in results['ResultSet']['ResultSetMetadata']['ColumnInfo']]
    rows = [[data.get('VarCharValue', '') for data in row['Data']] for row in results['ResultSet']['Rows'][1:]]
    
    return pd.DataFrame(rows, columns=columns)

df = query_glue_table()
df['temp_f'] = pd.to_numeric(df['temp_f'], errors='coerce')
df['rain_inches'] = pd.to_numeric(df['rain_inches'], errors='coerce')
df['windspeed'] = pd.to_numeric(df['windspeed'], errors='coerce')
print(f'Loaded {len(df)} records')

Loaded 999 records


In [3]:
# Create irrigation need target
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['month'] = df['timestamp'].dt.month

df['irrigation_need'] = (
    (df['temp_f'] - 32) * 0.15 +
    df['windspeed'] * 0.3 +
    np.where(df['rain_inches'] > 0, -df['rain_inches'] * 3, 2) +
    np.where(df['month'].isin([4,5,6,7,8,9]), 3, -1)
).clip(0, 10)

In [4]:
# Train model
df = df.dropna()
X = df[['temp_f', 'rain_inches', 'windspeed']]
y = df['irrigation_need']

model = RandomForestRegressor(n_estimators=50, random_state=42)
model.fit(X, y)
print('Model trained')

Model trained


In [5]:
# Predict irrigation needs
scenarios = [
    ['Hot Summer', 95, 0, 15],
    ['Rainy Day', 65, 2.0, 5],
    ['Cool Winter', 35, 0.1, 3]
]

print('\n=== CROP IRRIGATION PREDICTION RESULTS ===')
print('Irrigation Need Scale: 0-10 (0=No irrigation, 10=Maximum irrigation)\n')

results = []
results.append('=== CROP IRRIGATION PREDICTION RESULTS ===')
results.append('Irrigation Need Scale: 0-10 (0=No irrigation, 10=Maximum irrigation)')
results.append('')

high_need = []
low_need = []

for name, temp, rain, wind in scenarios:
    need = model.predict([[temp, rain, wind]])[0]
    need_rounded = math.ceil(need)
    result_line = f'{name}: {need_rounded}'
    print(result_line)
    results.append(result_line)
    
    if need_rounded >= 5:
        high_need.append(f'{name} ({need_rounded:.1f})')
    else:
        low_need.append(f'{name} ({need_rounded:.1f})')

print('\nHigh Irrigation Need (≥5):', ', '.join(high_need) if high_need else 'None')
print('Low Irrigation Need (<5):', ', '.join(low_need) if low_need else 'None')

results.append('')
results.append('High Irrigation Need (≥5): ' + (', '.join(high_need) if high_need else 'None'))
results.append('Low Irrigation Need (<5): ' + (', '.join(low_need) if low_need else 'None'))

# Save results to S3
data_path = proj.s3.root + '/output/results.txt'
parsed = urlparse(data_path)
bucket = parsed.netloc
key = parsed.path.lstrip('/')

s3.put_object(
    Bucket=bucket,
    Key=key,
    Body='\n'.join(results),
    ContentType='text/plain'
)
print(f'Results saved to {data_path}')


=== CROP IRRIGATION PREDICTION RESULTS ===
Irrigation Need Scale: 0-10 (0=No irrigation, 10=Maximum irrigation)

Hot Summer: 5
Rainy Day: 0
Cool Winter: 2

High Irrigation Need (≥5): Hot Summer (5.0)
Low Irrigation Need (<5): Rainy Day (0.0), Cool Winter (2.0)
Results saved to s3://amazon-sagemaker-851209711676-us-east-2-camtnqrstpq9pl/shared/output/results.txt
