Source code for aipype_g.tasklib.google_sheets_models

"""Google Sheets data models for structured sheet data representation."""

from dataclasses import dataclass, field
from typing import List, Optional, Dict, Any, Union, Callable


[docs] @dataclass class SheetRange: """Represents a range within a Google Sheet.""" sheet_name: str start_row: int start_col: int end_row: Optional[int] = None end_col: Optional[int] = None @property def a1_notation(self) -> str: """Convert to A1 notation (e.g., 'Sheet1!A1:C10').""" start_col_letter = self._col_num_to_letter(self.start_col) if self.end_row is None and self.end_col is None: # Single cell range_str = f"{start_col_letter}{self.start_row}" elif self.end_row is None: # Single row, multiple columns end_col_letter = self._col_num_to_letter(self.end_col or self.start_col) range_str = ( f"{start_col_letter}{self.start_row}:{end_col_letter}{self.start_row}" ) elif self.end_col is None: # Single column, multiple rows range_str = ( f"{start_col_letter}{self.start_row}:{start_col_letter}{self.end_row}" ) else: # Full range end_col_letter = self._col_num_to_letter(self.end_col) range_str = ( f"{start_col_letter}{self.start_row}:{end_col_letter}{self.end_row}" ) return f"{self.sheet_name}!{range_str}" @staticmethod def _col_num_to_letter(col_num: int) -> str: """Convert column number (1-based) to letter (A, B, C, ..., AA, AB, etc.).""" result = "" while col_num > 0: col_num -= 1 result = chr(65 + (col_num % 26)) + result col_num //= 26 return result
[docs] @staticmethod def from_a1_notation(a1_range: str) -> "SheetRange": """Create SheetRange from A1 notation (e.g., 'Sheet1!A1:C10').""" if "!" in a1_range: sheet_name, range_part = a1_range.split("!", 1) else: sheet_name = "Sheet1" # Default sheet name range_part = a1_range if ":" in range_part: start_cell, end_cell = range_part.split(":", 1) else: start_cell = range_part end_cell = None # Parse start cell start_col, start_row = SheetRange._parse_cell(start_cell) # Parse end cell if present if end_cell: end_col, end_row = SheetRange._parse_cell(end_cell) else: end_col, end_row = None, None return SheetRange( sheet_name=sheet_name, start_row=start_row, start_col=start_col, end_row=end_row, end_col=end_col, )
@staticmethod def _parse_cell(cell: str) -> tuple[int, int]: """Parse cell reference like 'A1' into (col_num, row_num).""" col_letters = "" row_digits = "" for char in cell: if char.isalpha(): col_letters += char.upper() elif char.isdigit(): row_digits += char # Convert column letters to number col_num = 0 for char in col_letters: col_num = col_num * 26 + (ord(char) - ord("A") + 1) return col_num, int(row_digits)
[docs] @dataclass class SheetData: """Represents data read from a Google Sheet.""" spreadsheet_id: str sheet_name: str range_read: str # Pyright incorrectly reports these as partially unknown despite explicit type annotations # These are legitimate dataclass fields with proper generic type parameters values: List[List[str]] = field(default_factory=list) # pyright: ignore[reportUnknownVariableType] headers: List[str] = field(default_factory=list) # pyright: ignore[reportUnknownVariableType] metadata: Dict[str, Any] = field(default_factory=dict) # pyright: ignore[reportUnknownVariableType] @property def num_rows(self) -> int: """Get number of data rows (excluding headers if present).""" return len(self.values) @property def num_cols(self) -> int: """Get number of columns.""" return len(self.values[0]) if self.values else 0 @property def has_headers(self) -> bool: """Check if headers are defined.""" return len(self.headers) > 0 @property def shape(self) -> tuple[int, int]: """Get shape as (rows, cols).""" return self.num_rows, self.num_cols
[docs] def get_column(self, col_index: int) -> List[str]: """Get values from a specific column. Args: col_index: 0-based column index Returns: List of values in the column """ if col_index >= self.num_cols: return [] return [row[col_index] if col_index < len(row) else "" for row in self.values]
[docs] def get_row(self, row_index: int) -> List[str]: """Get values from a specific row. Args: row_index: 0-based row index Returns: List of values in the row """ if row_index >= self.num_rows: return [] return self.values[row_index].copy()
[docs] def get_cell(self, row_index: int, col_index: int) -> str: """Get value from a specific cell. Args: row_index: 0-based row index col_index: 0-based column index Returns: Cell value as string, empty string if out of bounds """ if row_index >= self.num_rows or col_index >= len(self.values[row_index]): return "" return self.values[row_index][col_index]
[docs] def to_dict_list(self) -> List[Dict[str, str]]: """Convert to list of dictionaries using headers as keys. Returns: List of dictionaries, one per data row Raises: ValueError: If no headers are defined """ if not self.has_headers: raise ValueError("Cannot convert to dict list without headers") # Explicit type annotations for type checker result: List[Dict[str, str]] = [] for row in self.values: row_dict: Dict[str, str] = {} for i, header in enumerate(self.headers): row_dict[header] = row[i] if i < len(row) else "" result.append(row_dict) return result
[docs] def filter_rows(self, condition_func: Callable[[List[str]], bool]) -> "SheetData": """Filter rows based on a condition function. Args: condition_func: Function that takes a row (List[str]) and returns bool Returns: New SheetData with filtered rows """ filtered_values = [row for row in self.values if condition_func(row)] return SheetData( spreadsheet_id=self.spreadsheet_id, sheet_name=self.sheet_name, range_read=self.range_read, values=filtered_values, headers=self.headers.copy(), metadata=self.metadata.copy(), )
[docs] def to_dict(self) -> Dict[str, Any]: """Convert to dictionary representation.""" return { "spreadsheet_id": self.spreadsheet_id, "sheet_name": self.sheet_name, "range_read": self.range_read, "values": self.values, "headers": self.headers, "num_rows": self.num_rows, "num_cols": self.num_cols, "shape": self.shape, "has_headers": self.has_headers, "metadata": self.metadata, }
[docs] @dataclass class SpreadsheetInfo: """Represents information about a Google Spreadsheet.""" spreadsheet_id: str title: str # Pyright incorrectly reports these as partially unknown despite explicit type annotations # These are legitimate dataclass fields with proper generic type parameters sheet_names: List[str] = field(default_factory=list) # pyright: ignore[reportUnknownVariableType] properties: Dict[str, Any] = field(default_factory=dict) # pyright: ignore[reportUnknownVariableType] @property def num_sheets(self) -> int: """Get number of sheets in the spreadsheet.""" return len(self.sheet_names)
[docs] def has_sheet(self, sheet_name: str) -> bool: """Check if a sheet exists. Args: sheet_name: Name of the sheet to check Returns: True if sheet exists, False otherwise """ return sheet_name in self.sheet_names
[docs] def to_dict(self) -> Dict[str, Any]: """Convert to dictionary representation.""" return { "spreadsheet_id": self.spreadsheet_id, "title": self.title, "sheet_names": self.sheet_names, "num_sheets": self.num_sheets, "properties": self.properties, }
# Type aliases for Google Sheets API responses SheetsApiResponse = Dict[str, Any] SheetValues = List[List[Union[str, int, float]]]