Coverage for coffee_maker/auto_gemini_styleguide.py: 73%
224 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-21 05:58 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-21 05:58 +0000
1# co-author: Gemini Code Assist
2import argparse
3import difflib # For generating diffs
4import logging
5import os
6import pathlib
8import google.api_core.exceptions # Added: For GoogleAPICallError
9import google.generativeai as genai
10import google.generativeai.types # Added: For BlockedPromptException
11from dotenv import load_dotenv
13# --- Configuration ---
14# Relative path to the style guide from the script's location or project root
15DEFAULT_STYLEGUIDE_PATH = ".gemini/styleguide.md"
16# Relative path to the .env file
17DEFAULT_ENV_FILE_PATH = ".env"
18# Environment variable name for the API key
19API_KEY_ENV_VAR = "COFFEE_MAKER_GEMINI_API_KEY" # As per your preference
21# Delimiters for parsing LLM response
22MODIFIED_CODE_DELIMITER_START = "---MODIFIED_CODE_START---"
23MODIFIED_CODE_DELIMITER_END = "---MODIFIED_CODE_END---"
24EXPLANATIONS_DELIMITER_START = "---EXPLANATIONS_START---"
25EXPLANATIONS_DELIMITER_END = "---EXPLANATIONS_END---"
28def load_api_key(env_file_path: str, let_load_dotenv_search: bool = True) -> str | None:
29 """Loads the Google API key from .env file or environment variables.
31 Args:
32 env_file_path (str): The path to the .env file.
33 let_load_dotenv_search (bool): If True, and key not found via `env_file_path` or system vars,
34 `load_dotenv()` will search default locations.
36 Returns:
37 str | None: The API key if found, otherwise None.
38 """
39 if pathlib.Path(env_file_path).is_file():
40 logging.info(f"Sourcing environment variables from '{env_file_path}'...")
41 # load_dotenv expects a string or Pathlike object for dotenv_path
42 load_dotenv(dotenv_path=env_file_path, override=True)
43 else:
44 logging.info(f"Info: Environment file '{env_file_path}' not found. Checking system environment variables.")
46 api_key = os.getenv(API_KEY_ENV_VAR)
47 if not api_key:
48 logging.warning(
49 f"Error: API key not found. Please set the {API_KEY_ENV_VAR} environment variable "
50 f"or provide it in '{env_file_path}'."
51 )
52 logging.info(f"You can get an API key from Google AI Studio (https://aistudio.google.com/app/apikey).")
54 if let_load_dotenv_search and load_dotenv():
55 api_key = os.getenv(API_KEY_ENV_VAR)
56 if not api_key:
57 logging.error("We tried load_dotenv() (which searches default locations like .env) without specifying env_file_path, but could not find the API key.")
58 return None
59 else:
60 logging.info(f"{API_KEY_ENV_VAR} was found by load_dotenv.")
62 return api_key
65def read_file_content(file_path: str) -> str | None:
66 """Reads the content of a file."""
67 try:
68 with open(file_path, "r", encoding="utf-8") as f:
69 return f.read()
70 except FileNotFoundError:
71 logging.error(f"Error: File not found at '{file_path}'.")
72 return None
73 except Exception as e:
74 logging.exception(f"Error reading file '{file_path}': {e}")
75 return None
78def write_file_content(file_path: str, content: str) -> bool:
79 """Writes content to a file, overwriting it."""
80 try:
81 with open(file_path, "w", encoding="utf-8") as f:
82 f.write(content)
83 logging.info(f"Successfully updated '{file_path}'.")
84 return True
85 except Exception as e:
86 logging.exception(f"Error writing to file '{file_path}': {e}")
87 return False
90def construct_llm_prompt(style_guide_content: str, code_to_modify: str, file_name: str) -> str:
91 """Constructs the prompt for the LLM, asking for modified code and explanations."""
92 prompt = f"""You are an expert code formatting and styling assistant.
93Your task is to take the provided code snippet and reformat/restyle it to adhere (with minimal changes : don't change the code logic) to the rules outlined in the "STYLE GUIDE" below.
94The code is from the file named '{file_name}'.
96Your response MUST be structured in two parts, using the exact delimiters provided:
98Part 1: The Modified Code
99Begin this part with the delimiter "{MODIFIED_CODE_DELIMITER_START}" on a new line.
100Provide ONLY the fully modified code. Do not include any explanations, apologies, or introductory sentences within this code block.
101End this part with the delimiter "{MODIFIED_CODE_DELIMITER_END}" on a new line.
103Part 2: Explanations for Changes
104Begin this part with the delimiter "{EXPLANATIONS_DELIMITER_START}" on a new line.
105List the significant changes you made to the code and briefly explain why each change was made, referencing the "STYLE GUIDE" rules where applicable.
106If no changes were made, state "No changes were necessary."
107End this part with the delimiter "{EXPLANATIONS_DELIMITER_END}" on a new line.
109Example of your response structure:
110{MODIFIED_CODE_DELIMITER_START}
111# ... your modified code here ...
112{MODIFIED_CODE_DELIMITER_END}
113{EXPLANATIONS_DELIMITER_START}
114- Line X: Changed Y to Z because of style guide rule A.1 (e.g., line length).
115- Line Y: Refactored function F for clarity as per style guide section B (e.g., readability).
116{EXPLANATIONS_DELIMITER_END}
118STYLE GUIDE:
119---
120{style_guide_content}
121---
123ORIGINAL CODE from '{file_name}':
124---
125{code_to_modify}
126---
128Now, provide your response following the structure above.
129"""
130 return prompt
133def parse_llm_response(llm_full_response: str) -> tuple[str | None, str | None]:
134 """Parses the LLM's response to extract modified code and explanations."""
135 modified_code = None
136 explanations = None
137 logging.debug(f"PARSER: Received LLM response length: {len(llm_full_response)}")
139 try:
140 # Find the primary delimiters that separate the main sections
141 idx_code_start_delimiter = llm_full_response.find(MODIFIED_CODE_DELIMITER_START)
142 idx_explanation_start_delimiter = llm_full_response.find(EXPLANATIONS_DELIMITER_START)
143 idx_explanation_end_delimiter = llm_full_response.find(EXPLANATIONS_DELIMITER_END)
145 # --- Extract Explanations First ---
146 # This is often more straightforward if the AI terminates it correctly.
147 if (
148 idx_explanation_start_delimiter != -1
149 and idx_explanation_end_delimiter != -1
150 and idx_explanation_start_delimiter < idx_explanation_end_delimiter
151 ):
152 start_of_explanation_payload = idx_explanation_start_delimiter + len(EXPLANATIONS_DELIMITER_START)
153 explanations = llm_full_response[start_of_explanation_payload:idx_explanation_end_delimiter].strip()
154 elif idx_explanation_start_delimiter != -1: # Start found, but no end
155 logging.warning(
156 f"PARSER: Found '{EXPLANATIONS_DELIMITER_START}' but no matching '{EXPLANATIONS_DELIMITER_END}'. Explanation block might be unterminated."
157 )
158 explanations = llm_full_response[
159 idx_explanation_start_delimiter + len(EXPLANATIONS_DELIMITER_START) :
160 ].strip() # Take to end
161 else:
162 logging.debug( # Changed to debug, as this is expected for malformed
163 f"PARSER: Could not find explanation block delimiters ('{EXPLANATIONS_DELIMITER_START}', '{EXPLANATIONS_DELIMITER_END}')."
164 )
166 # --- Extract Modified Code ---
167 if idx_code_start_delimiter != -1:
168 start_of_code_payload = idx_code_start_delimiter + len(MODIFIED_CODE_DELIMITER_START)
169 end_of_ai_code_block_boundary = -1
171 if idx_explanation_start_delimiter != -1 and idx_explanation_start_delimiter > start_of_code_payload:
172 # Code ends right before explanations start
173 end_of_ai_code_block_boundary = idx_explanation_start_delimiter
174 else:
175 # No explanation block after code start, or malformed.
176 # Look for the AI's intended MODIFIED_CODE_DELIMITER_END after the code start.
177 end_of_ai_code_block_boundary = llm_full_response.rfind(
178 MODIFIED_CODE_DELIMITER_END, start_of_code_payload
179 )
180 if end_of_ai_code_block_boundary == -1: # MCE not found after MCS
181 # If no explanation start and no MCE after code start, assume code goes to end.
182 end_of_ai_code_block_boundary = len(llm_full_response)
183 logging.warning(
184 f"PARSER: No '{EXPLANATIONS_DELIMITER_START}' found after code, and no '{MODIFIED_CODE_DELIMITER_END}' found after code start. Assuming code extends to end of response."
185 )
187 if start_of_code_payload < end_of_ai_code_block_boundary:
188 # This segment is what the AI considers its code output, potentially ending with its own MODIFIED_CODE_DELIMITER_END
189 ai_code_output_segment = llm_full_response[start_of_code_payload:end_of_ai_code_block_boundary]
190 stripped_ai_code_segment = ai_code_output_segment.rstrip()
192 # Now, remove the AI's *actual* MODIFIED_CODE_DELIMITER_END from the end of this segment
193 if stripped_ai_code_segment.endswith(MODIFIED_CODE_DELIMITER_END):
194 modified_code = stripped_ai_code_segment[: -len(MODIFIED_CODE_DELIMITER_END)].strip()
195 else:
196 logging.warning(
197 f"PARSER: AI's code output segment (len {len(stripped_ai_code_segment)}) did not end with '{MODIFIED_CODE_DELIMITER_END}'. "
198 f"Segment tail (last 50 chars): '{repr(stripped_ai_code_segment[-50:])}'. Using segment as is (after stripping)."
199 )
200 modified_code = stripped_ai_code_segment.strip() # Use the segment as is, but stripped
201 else:
202 logging.warning(
203 f"PARSER: Code start payload index ({start_of_code_payload}) not before code end boundary ({end_of_ai_code_block_boundary}). Cannot extract code."
204 )
206 # --- Fallback for completely missing delimiters ---
207 # If after all attempts, modified_code is still None and explanations is None,
208 # and the original response wasn't empty, assume it's a completely malformed response
209 # and treat the whole thing as code.
210 if modified_code is None and explanations is None and llm_full_response.strip():
211 all_delimiters_missing = all(
212 delim not in llm_full_response
213 for delim in [
214 MODIFIED_CODE_DELIMITER_START,
215 MODIFIED_CODE_DELIMITER_END,
216 EXPLANATIONS_DELIMITER_START,
217 EXPLANATIONS_DELIMITER_END,
218 ]
219 )
220 if all_delimiters_missing:
221 logging.warning("PARSER: No delimiters found anywhere. Treating entire response as modified code.")
222 modified_code = llm_full_response.strip()
223 else:
224 # This case means some delimiters were found, but the structure didn't fit any parsing logic.
225 # modified_code and explanations remain None.
226 logging.warning("PARSER: Some delimiters found, but structure is unexpected. Cannot reliably parse.")
228 except Exception as e:
229 logging.exception(f"PARSER: Error during LLM response parsing: {e}")
230 # Ensure None is returned on exception
231 return None, None
233 logging.debug(f"PARSER: Final modified_code (first 100): {repr(modified_code[:100]) if modified_code else 'None'}")
234 logging.debug(f"PARSER: Final explanations (first 100): {repr(explanations[:100]) if explanations else 'None'}")
235 return modified_code, explanations
238def get_ai_suggestion(api_key: str, model_name: str, prompt: str) -> tuple[str | None, str | None]:
239 """Calls the Gemini API and gets the modified code and explanations."""
240 try:
241 genai.configure(api_key=api_key)
242 model = genai.GenerativeModel(model_name)
243 logging.info(f"Sending request to Gemini model '{model_name}'...")
245 generation_config = genai.types.GenerationConfig(candidate_count=1, temperature=0.1)
247 response = model.generate_content(prompt, generation_config=generation_config)
249 if not response.candidates or not response.candidates[0].content.parts:
250 logging.error("Error: Model did not return any content.")
251 if response.prompt_feedback:
252 logging.info(f"Prompt Feedback: {response.prompt_feedback}")
253 return None, None
255 full_llm_output = response.text
257 # --- DEBUGGING: Print raw LLM response ---
258 logging.debug("\n" + "=" * 20 + " RAW LLM RESPONSE " + "=" * 20)
259 logging.debug(f"Raw LLM Output Length: {len(full_llm_output)}")
260 # logging.debug(full_llm_output) # Uncomment to see the full raw output if needed
261 # For very detailed inspection of potential hidden characters:
262 # logging.debug("Representation of RAW LLM response (first 500 chars):")
263 # logging.debug(repr(full_llm_output[:500]))
264 # logging.debug("Representation of RAW LLM response (last 500 chars):")
265 # logging.debug(repr(full_llm_output[-500:]))
266 logging.debug("=" * (40 + len(" RAW LLM RESPONSE ")) + "\n")
267 # --- END DEBUGGING ---
269 return parse_llm_response(full_llm_output)
271 except google.generativeai.types.BlockedPromptException as bpe:
272 logging.error(f"Gemini API Error: Prompt was blocked. {bpe}")
273 # Potentially log bpe.response.prompt_feedback if available and relevant
274 return None, None
275 except google.api_core.exceptions.GoogleAPICallError as api_error:
276 logging.error(f"Google API Call Error: {api_error}")
277 # api_error often has structured details like api_error.code() or api_error.message
278 return None, None
279 except Exception as e:
280 logging.exception(f"Unexpected error calling Gemini API: {e}")
281 # Note: The specific Google API exceptions inherit from Exception, so this will catch them too,
282 # but the more specific handlers above allow for different logging/handling if needed.
283 # If you remove the specific handlers, this one will catch them.
284 if hasattr(e, "response") and e.response:
285 logging.error(f"API Response Status: {e.response.status_code}")
286 logging.error(f"API Response Body: {e.response.text}")
287 return None, None
290def generate_and_write_diff(
291 original_content: str, modified_content: str, target_file_path: str, explanations: str | None
292) -> bool:
293 """
294 Generates a diff and writes it to a .diff.<filename> file if actual code changes exist.
295 Explanations are included in the diff file if changes were made, or logged if no code changes.
296 """
297 original_filename = os.path.basename(target_file_path)
298 diff_filename = f".diff.{original_filename}"
299 diff_file_path = os.path.join(os.path.dirname(target_file_path), diff_filename)
301 # Primary condition: Only create a diff file if code content has actually changed.
302 # Use strip() to ignore leading/trailing whitespace differences
303 if original_content.strip() == modified_content.strip():
304 logging.info("Code content is identical to the original after stripping whitespace.")
305 if explanations:
306 # Log explanations, but do not create the diff file for the code.
307 logging.info(f"AI provided explanations for no code change:\n{explanations}")
308 else:
309 logging.info("No explanations provided for identical code.")
310 logging.info(f"Diff file '{diff_file_path}' will NOT be created as there are no actual code changes.")
311 return True # Operation considered successful, but no diff file generated for code.
313 # If we reach here, original_content.strip() != modified_content.strip(), so there are changes.
314 logging.info("Code content has changed. Generating diff file.")
316 original_lines = original_content.splitlines(keepends=True)
317 modified_lines = modified_content.splitlines(keepends=True)
319 # Create a unified diff
320 diff_generator = difflib.unified_diff(
321 original_lines,
322 modified_lines,
323 fromfile=f"a/{original_filename}",
324 tofile=f"b/{original_filename}",
325 lineterm="", # Avoids extra newlines if source lines already have them
326 )
328 diff_content_list = list(diff_generator)
330 # Although we checked strip(), difflib might still produce an empty list if changes are only whitespace/newlines
331 # or if there's some other subtle difference it doesn't represent in the diff format.
332 # We should still write the file if we reached this point based on the strip() check,
333 # but maybe add a note if the diff_content_list is empty unexpectedly.
335 try:
336 with open(diff_file_path, "w", encoding="utf-8") as f:
337 f.write(f"# Diff for {original_filename} (AI Suggested Changes)\n")
338 f.write("# Generated by auto_gemini_styleguide.py\n")
339 f.write("-" * 30 + " GIT-STYLE UNIFIED DIFF " + "-" * 30 + "\n")
341 if not diff_content_list:
342 logging.warning(
343 "Difflib generated an empty diff list, but content comparison (strip) indicated a difference. This is unusual."
344 )
345 f.write("--- Difflib reported no changes, but content comparison (strip) differed. ---\n")
346 else:
347 for line in diff_content_list:
348 f.write(line)
350 if explanations:
351 f.write("\n\n" + "-" * 30 + " AI EXPLANATIONS FOR CHANGES " + "-" * 30 + "\n")
352 f.write(explanations + "\n")
353 else:
354 f.write("\n\n" + "-" * 30 + " AI EXPLANATIONS FOR CHANGES " + "-" * 30 + "\n")
355 f.write("No specific explanations were provided by the AI for these changes.\n")
357 logging.info(f"Successfully wrote diff and explanations to '{diff_file_path}'.")
358 return True
359 except Exception as e:
360 logging.exception(f"Error writing to diff file '{diff_file_path}': {e}")
361 return False
364def main():
365 """Main function to autocorrect a file using Google AI and generate a diff."""
366 # Configure logging at the beginning of main or at module level
367 # Set level to DEBUG to see all parser logs and raw LLM response
368 logging.basicConfig(
369 level=logging.DEBUG,
370 format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s",
371 )
373 parser = argparse.ArgumentParser(
374 description="Autocorrects a file using Google AI according to a style guide and generates a diff with explanations.",
375 formatter_class=argparse.RawTextHelpFormatter,
376 )
377 parser.add_argument("target_file_path", help="The path to the Python file to autocorrect.")
378 parser.add_argument(
379 "--styleguide",
380 default=DEFAULT_STYLEGUIDE_PATH,
381 help=f"Path to the style guide markdown file (default: {DEFAULT_STYLEGUIDE_PATH}).",
382 )
383 parser.add_argument(
384 "--envfile",
385 default=DEFAULT_ENV_FILE_PATH,
386 help=f"Path to the .env file for API key (default: {DEFAULT_ENV_FILE_PATH}).",
387 )
388 parser.add_argument(
389 "--model",
390 default="gemini-2.0-flash-lite",
391 help="The Gemini model to use (e.g., 'gemini-2.0-flash-lite', 'gemini-1.5-flash-latest', 'gemini-pro').",
392 )
393 parser.add_argument(
394 "--backup", action="store_true", help="Create a backup of the original file (as .bak) before overwriting."
395 )
396 parser.add_argument(
397 "--no-modify", action="store_true", help="Do not modify the original file. Only generate the .diff file."
398 )
399 # Added --debug argument to control logging level from CLI
400 parser.add_argument("--debug", action="store_true", help="Enable debug logging for more verbose output.")
402 args = parser.parse_args()
404 # Set logging level based on --debug argument
405 # This overrides the basicConfig level if --debug is not used
406 # If --debug is used, basicConfig already set it to DEBUG
407 if not args.debug:
408 logging.getLogger().setLevel(logging.INFO)
410 logging.info("--- AI Code Style Corrector & Differ ---")
412 api_key = load_api_key(args.envfile)
413 if not api_key:
414 return 1
416 logging.info(f"Reading style guide from: {args.styleguide}")
417 style_guide_content = read_file_content(args.styleguide)
418 if style_guide_content is None:
419 return 1
421 logging.info(f"Reading target file: {args.target_file_path}")
422 original_code_content = read_file_content(args.target_file_path)
423 if original_code_content is None:
424 return 1
426 if args.backup:
427 backup_file_path = f"{args.target_file_path}.bak"
428 logging.info(f"Creating backup: {backup_file_path}")
429 if not write_file_content(backup_file_path, original_code_content):
430 logging.warning("Warning: Failed to create backup. Proceeding cautiously.")
432 prompt = construct_llm_prompt(style_guide_content, original_code_content, pathlib.Path(args.target_file_path).name)
434 modified_code, explanations = get_ai_suggestion(api_key, args.model, prompt)
436 if modified_code is not None:
437 logging.info("--- AI Suggestion Received ---")
439 # Decide if we should process changes (generate diff, potentially modify file)
440 # Process if code changed OR if explanations were provided (even if code didn't change)
441 should_process_changes = modified_code.strip() != original_code_content.strip() or bool(explanations)
443 if should_process_changes:
444 # generate_and_write_diff will now internally decide if a diff FILE is created
445 # based on whether the code content actually changed.
446 generate_and_write_diff(original_code_content, modified_code, args.target_file_path, explanations)
448 if args.no_modify:
449 logging.info(f"Original file '{args.target_file_path}' was NOT modified due to --no-modify flag.")
450 elif modified_code.strip() == original_code_content.strip():
451 # This case is hit if AI returned identical code but provided explanations.
452 # generate_and_write_diff logged the explanations and skipped diff file creation.
453 logging.info("AI returned identical code content. No changes made to the original file based on code.")
454 else:
455 # Code content actually changed, proceed with writing the modified file
456 logging.info(f"Attempting to write AI modified code back to '{args.target_file_path}'...")
457 logging.info("IMPORTANT: Please review the changes carefully after the script finishes.")
458 if write_file_content(args.target_file_path, modified_code):
459 logging.info("Original file successfully updated with AI suggestions.")
460 else:
461 logging.error("Failed to write modified code to the original file.")
462 return 1
463 else:
464 # modified_code is not None, but code is identical AND no explanations were provided.
465 # This is the case where AI returned the exact same code and nothing else.
466 logging.info("AI returned identical code content and no explanations. No changes made, no diff generated.")
468 logging.info("Process completed.")
469 else:
470 # This means get_ai_suggestion returned (None, explanations) or (None, None)
471 logging.error(
472 "Failed to get a valid modified code block from the AI. No changes made to the file. No diff generated."
473 )
474 if explanations: # If explanations were returned but code was None
475 logging.info("AI provided explanations, but no valid code block was parsed:")
476 logging.info("-" * 20 + " EXPLANATIONS " + "-" * 20)
477 logging.info(explanations) # Log the explanations
478 logging.info("-" * (40 + len(" EXPLANATIONS ")) + "\n")
479 return 1
481 return 0
484if __name__ == "__main__":
485 import sys
487 exit_code = main()
488 sys.exit(exit_code)