Source code for aipype_g.tasklib.google_sheets_service

"""Google Sheets API Service - Core Sheets operations with OAuth2 authentication."""

import os
from typing import Any, List, Optional, Callable
from google.oauth2.credentials import Credentials

# Google API client library lacks comprehensive type stubs
from googleapiclient.discovery import build  # pyright: ignore[reportUnknownVariableType]
from googleapiclient.errors import HttpError

from aipype.utils.common import setup_logger
from .google_sheets_models import SheetData, SheetRange, SpreadsheetInfo
from .google_auth_service import GoogleAuthService, SHEETS_SCOPES


# Type aliases for Google Sheets API
SheetsServiceType = Any
ProgressCallback = Callable[[str], None]


class GoogleSheetsError(Exception):
    """Custom exception for Google Sheets service errors."""

    pass


[docs] class GoogleSheetsService: """Google Sheets API service with OAuth2 authentication and read operations."""
[docs] def __init__( self, credentials: Optional[Credentials] = None, credentials_file: Optional[str] = None, token_file: Optional[str] = None, timeout: int = 30, ) -> None: """Initialize Google Sheets service. Args: credentials: Pre-authenticated Google credentials (from GoogleOAuthTask) credentials_file: Path to OAuth2 credentials JSON file (fallback) token_file: Path to store/load OAuth2 tokens (fallback) timeout: Request timeout in seconds (default: 30) """ self.logger = setup_logger("google_sheets_service") self.timeout = timeout # Google API client lacks comprehensive type stubs, using Any for service object self.service: Optional[SheetsServiceType] = None self.credentials: Optional[Credentials] = credentials # Store auth params for fallback authentication self.credentials_file = credentials_file or os.getenv( "GOOGLE_CREDENTIALS_FILE", os.getenv("GMAIL_CREDENTIALS_FILE", "google_credentials.json"), ) self.token_file = token_file or os.getenv( "SHEETS_TOKEN_FILE", "sheets_token.json" ) # Try to initialize service try: self._initialize_service() except Exception as e: self.logger.warning( f"Failed to initialize Sheets service: {e}. Service will be initialized on first use." )
def _initialize_service(self) -> None: """Initialize the Google Sheets service.""" if not self.credentials: # Use GoogleAuthService for authentication auth_service = GoogleAuthService( credentials_file=self.credentials_file, token_file=self.token_file, scopes=SHEETS_SCOPES, ) self.credentials = auth_service.authenticate() # Build service # Google API build function returns service object with unknown type self.service = build("sheets", "v4", credentials=self.credentials) self.logger.info("Google Sheets API service initialized successfully") def _ensure_service(self) -> None: """Ensure service is initialized and ready.""" if not self.service: self._initialize_service()
[docs] def get_spreadsheet_info( self, spreadsheet_id: str, progress_callback: Optional[ProgressCallback] = None, ) -> SpreadsheetInfo: """Get information about a spreadsheet. Args: spreadsheet_id: ID of the Google Spreadsheet progress_callback: Optional callback for progress updates Returns: SpreadsheetInfo object with spreadsheet metadata Raises: GoogleSheetsError: If API call fails """ self._ensure_service() if progress_callback: progress_callback(f"Fetching spreadsheet info for {spreadsheet_id[:8]}...") try: # Google API service methods have unknown return types spreadsheet = ( self.service.spreadsheets() # pyright: ignore[reportOptionalMemberAccess] .get(spreadsheetId=spreadsheet_id) .execute() ) title = spreadsheet.get("properties", {}).get("title", "Untitled") sheets = spreadsheet.get("sheets", []) sheet_names = [sheet["properties"]["title"] for sheet in sheets] info = SpreadsheetInfo( spreadsheet_id=spreadsheet_id, title=title, sheet_names=sheet_names, properties=spreadsheet.get("properties", {}), ) if progress_callback: progress_callback( f"Retrieved info for '{title}' with {len(sheet_names)} sheets" ) self.logger.debug( f"Retrieved spreadsheet info: {title} ({len(sheet_names)} sheets)" ) return info except HttpError as e: error_msg = f"Google Sheets API error getting spreadsheet info: {e}" self.logger.error(error_msg) raise GoogleSheetsError(error_msg) from e
[docs] def read_range( self, spreadsheet_id: str, range_a1: str, include_headers: bool = True, progress_callback: Optional[ProgressCallback] = None, ) -> SheetData: """Read data from a specific range in a spreadsheet. Args: spreadsheet_id: ID of the Google Spreadsheet range_a1: Range in A1 notation (e.g., 'Sheet1!A1:C10') include_headers: Whether first row should be treated as headers progress_callback: Optional callback for progress updates Returns: SheetData object with the read data Raises: GoogleSheetsError: If API call fails or range is invalid """ self._ensure_service() if progress_callback: progress_callback(f"Reading range {range_a1} from spreadsheet...") try: # Google API service methods have unknown return types result = ( self.service.spreadsheets() # pyright: ignore[reportOptionalMemberAccess] .values() .get(spreadsheetId=spreadsheet_id, range=range_a1) .execute() ) # Extract raw values raw_values: List[List[Any]] = result.get("values", []) # Convert all values to strings for consistency string_values: List[List[str]] = [] for row in raw_values: string_row = [str(cell) if cell is not None else "" for cell in row] string_values.append(string_row) # Parse sheet name from range if "!" in range_a1: sheet_name = range_a1.split("!", 1)[0] else: sheet_name = "Sheet1" # Default # Handle headers headers: List[str] = [] data_values = string_values if include_headers and string_values: headers = string_values[0] data_values = string_values[1:] sheet_data = SheetData( spreadsheet_id=spreadsheet_id, sheet_name=sheet_name, range_read=range_a1, values=data_values, headers=headers, metadata={ "include_headers": include_headers, "raw_row_count": len(raw_values), "processed_row_count": len(data_values), }, ) if progress_callback: progress_callback( f"Read {len(data_values)} rows from {sheet_name} " f"({len(headers)} headers)" if headers else "(no headers)" ) self.logger.info( f"Successfully read range {range_a1}: {len(data_values)} data rows, " f"{len(headers)} headers" ) return sheet_data except HttpError as e: error_msg = f"Google Sheets API error reading range {range_a1}: {e}" self.logger.error(error_msg) raise GoogleSheetsError(error_msg) from e
[docs] def read_sheet( self, spreadsheet_id: str, sheet_name: str, start_row: int = 1, end_row: Optional[int] = None, start_col: int = 1, end_col: Optional[int] = None, include_headers: bool = True, progress_callback: Optional[ProgressCallback] = None, ) -> SheetData: """Read data from a sheet with row/column bounds. Args: spreadsheet_id: ID of the Google Spreadsheet sheet_name: Name of the sheet to read start_row: Starting row (1-based, default: 1) end_row: Ending row (1-based, optional) start_col: Starting column (1-based, default: 1) end_col: Ending column (1-based, optional) include_headers: Whether first row should be treated as headers progress_callback: Optional callback for progress updates Returns: SheetData object with the read data Raises: GoogleSheetsError: If API call fails """ # Create range object and convert to A1 notation range_obj = SheetRange( sheet_name=sheet_name, start_row=start_row, start_col=start_col, end_row=end_row, end_col=end_col, ) return self.read_range( spreadsheet_id=spreadsheet_id, range_a1=range_obj.a1_notation, include_headers=include_headers, progress_callback=progress_callback, )
[docs] def read_all_data( self, spreadsheet_id: str, sheet_name: str, include_headers: bool = True, progress_callback: Optional[ProgressCallback] = None, ) -> SheetData: """Read all data from a sheet. Args: spreadsheet_id: ID of the Google Spreadsheet sheet_name: Name of the sheet to read include_headers: Whether first row should be treated as headers progress_callback: Optional callback for progress updates Returns: SheetData object with all data from the sheet Raises: GoogleSheetsError: If API call fails """ # Use sheet name only to get all data range_a1 = sheet_name return self.read_range( spreadsheet_id=spreadsheet_id, range_a1=range_a1, include_headers=include_headers, progress_callback=progress_callback, )
[docs] def get_sheet_names( self, spreadsheet_id: str, progress_callback: Optional[ProgressCallback] = None, ) -> List[str]: """Get list of sheet names in a spreadsheet. Args: spreadsheet_id: ID of the Google Spreadsheet progress_callback: Optional callback for progress updates Returns: List of sheet names Raises: GoogleSheetsError: If API call fails """ info = self.get_spreadsheet_info(spreadsheet_id, progress_callback) return info.sheet_names
[docs] def validate_range( self, spreadsheet_id: str, range_a1: str, progress_callback: Optional[ProgressCallback] = None, ) -> bool: """Validate if a range exists and is accessible. Args: spreadsheet_id: ID of the Google Spreadsheet range_a1: Range in A1 notation progress_callback: Optional callback for progress updates Returns: True if range is valid and accessible, False otherwise """ try: # Attempt to read just the first cell to validate range self.read_range( spreadsheet_id=spreadsheet_id, range_a1=range_a1, include_headers=False, progress_callback=progress_callback, ) return True except GoogleSheetsError: return False