Coverage for coffee_maker/auto_gemini

1# co-author: Gemini Code Assist

2import argparse

3import difflib # For generating diffs

4import logging

5import os

6import pathlib

8import google.api_core.exceptions # Added: For GoogleAPICallError

9import google.generativeai as genai

10import google.generativeai.types # Added: For BlockedPromptException

11from dotenv import load_dotenv

13# --- Configuration ---

14# Relative path to the style guide from the script's location or project root

15DEFAULT_STYLEGUIDE_PATH = ".gemini/styleguide.md"

16# Relative path to the .env file

17DEFAULT_ENV_FILE_PATH = ".env"

18# Environment variable name for the API key

19API_KEY_ENV_VAR = "COFFEE_MAKER_GEMINI_API_KEY" # As per your preference

21# Delimiters for parsing LLM response

22MODIFIED_CODE_DELIMITER_START = "---MODIFIED_CODE_START---"

23MODIFIED_CODE_DELIMITER_END = "---MODIFIED_CODE_END---"

24EXPLANATIONS_DELIMITER_START = "---EXPLANATIONS_START---"

25EXPLANATIONS_DELIMITER_END = "---EXPLANATIONS_END---"

28def load_api_key(env_file_path: str, let_load_dotenv_search: bool = True) -> str | None:

29 """Loads the Google API key from .env file or environment variables.

31 Args:

32 env_file_path (str): The path to the .env file.

33 let_load_dotenv_search (bool): If True, and key not found via `env_file_path` or system vars,

34 `load_dotenv()` will search default locations.

36 Returns:

37 str | None: The API key if found, otherwise None.

38 """

39 if pathlib.Path(env_file_path).is_file():

40 logging.info(f"Sourcing environment variables from '{env_file_path}'...")

41 # load_dotenv expects a string or Pathlike object for dotenv_path

42 load_dotenv(dotenv_path=env_file_path, override=True)

43 else:

44 logging.info(f"Info: Environment file '{env_file_path}' not found. Checking system environment variables.")

46 api_key = os.getenv(API_KEY_ENV_VAR)

47 if not api_key:

48 logging.warning(

49 f"Error: API key not found. Please set the {API_KEY_ENV_VAR} environment variable "

50 f"or provide it in '{env_file_path}'."

51 )

52 logging.info(f"You can get an API key from Google AI Studio (https://aistudio.google.com/app/apikey).")

54 if let_load_dotenv_search and load_dotenv():

55 api_key = os.getenv(API_KEY_ENV_VAR)

56 if not api_key:

57 logging.error("We tried load_dotenv() (which searches default locations like .env) without specifying env_file_path, but could not find the API key.")

58 return None

59 else:

60 logging.info(f"{API_KEY_ENV_VAR} was found by load_dotenv.")

62 return api_key

65def read_file_content(file_path: str) -> str | None:

66 """Reads the content of a file."""

67 try:

68 with open(file_path, "r", encoding="utf-8") as f:

69 return f.read()

70 except FileNotFoundError:

71 logging.error(f"Error: File not found at '{file_path}'.")

72 return None

73 except Exception as e:

74 logging.exception(f"Error reading file '{file_path}': {e}")

75 return None

78def write_file_content(file_path: str, content: str) -> bool:

79 """Writes content to a file, overwriting it."""

80 try:

81 with open(file_path, "w", encoding="utf-8") as f:

82 f.write(content)

83 logging.info(f"Successfully updated '{file_path}'.")

84 return True

85 except Exception as e:

86 logging.exception(f"Error writing to file '{file_path}': {e}")

87 return False

90def construct_llm_prompt(style_guide_content: str, code_to_modify: str, file_name: str) -> str:

91 """Constructs the prompt for the LLM, asking for modified code and explanations."""

92 prompt = f"""You are an expert code formatting and styling assistant.

93Your task is to take the provided code snippet and reformat/restyle it to adhere (with minimal changes : don't change the code logic) to the rules outlined in the "STYLE GUIDE" below.

94The code is from the file named '{file_name}'.

96Your response MUST be structured in two parts, using the exact delimiters provided:

98Part 1: The Modified Code

99Begin this part with the delimiter "{MODIFIED_CODE_DELIMITER_START}" on a new line.

100Provide ONLY the fully modified code. Do not include any explanations, apologies, or introductory sentences within this code block.

101End this part with the delimiter "{MODIFIED_CODE_DELIMITER_END}" on a new line.

102

103Part 2: Explanations for Changes

104Begin this part with the delimiter "{EXPLANATIONS_DELIMITER_START}" on a new line.

105List the significant changes you made to the code and briefly explain why each change was made, referencing the "STYLE GUIDE" rules where applicable.

106If no changes were made, state "No changes were necessary."

107End this part with the delimiter "{EXPLANATIONS_DELIMITER_END}" on a new line.

108

109Example of your response structure:

110{MODIFIED_CODE_DELIMITER_START}

111# ... your modified code here ...

112{MODIFIED_CODE_DELIMITER_END}

113{EXPLANATIONS_DELIMITER_START}

114- Line X: Changed Y to Z because of style guide rule A.1 (e.g., line length).

115- Line Y: Refactored function F for clarity as per style guide section B (e.g., readability).

116{EXPLANATIONS_DELIMITER_END}

117

118STYLE GUIDE:

119---

120{style_guide_content}

121---

122

123ORIGINAL CODE from '{file_name}':

124---

125{code_to_modify}

126---

127

128Now, provide your response following the structure above.

129"""

130 return prompt

131

132

133def parse_llm_response(llm_full_response: str) -> tuple[str | None, str | None]:

134 """Parses the LLM's response to extract modified code and explanations."""

135 modified_code = None

136 explanations = None

137 logging.debug(f"PARSER: Received LLM response length: {len(llm_full_response)}")

138

139 try:

140 # Find the primary delimiters that separate the main sections

141 idx_code_start_delimiter = llm_full_response.find(MODIFIED_CODE_DELIMITER_START)

142 idx_explanation_start_delimiter = llm_full_response.find(EXPLANATIONS_DELIMITER_START)

143 idx_explanation_end_delimiter = llm_full_response.find(EXPLANATIONS_DELIMITER_END)

144

145 # --- Extract Explanations First ---

146 # This is often more straightforward if the AI terminates it correctly.

147 if (

148 idx_explanation_start_delimiter != -1

149 and idx_explanation_end_delimiter != -1

150 and idx_explanation_start_delimiter < idx_explanation_end_delimiter

151 ):

152 start_of_explanation_payload = idx_explanation_start_delimiter + len(EXPLANATIONS_DELIMITER_START)

153 explanations = llm_full_response[start_of_explanation_payload:idx_explanation_end_delimiter].strip()

154 elif idx_explanation_start_delimiter != -1: # Start found, but no end

155 logging.warning(

156 f"PARSER: Found '{EXPLANATIONS_DELIMITER_START}' but no matching '{EXPLANATIONS_DELIMITER_END}'. Explanation block might be unterminated."

157 )

158 explanations = llm_full_response[

159 idx_explanation_start_delimiter + len(EXPLANATIONS_DELIMITER_START) :

160 ].strip() # Take to end

161 else:

162 logging.debug( # Changed to debug, as this is expected for malformed

163 f"PARSER: Could not find explanation block delimiters ('{EXPLANATIONS_DELIMITER_START}', '{EXPLANATIONS_DELIMITER_END}')."

164 )

165

166 # --- Extract Modified Code ---

167 if idx_code_start_delimiter != -1:

168 start_of_code_payload = idx_code_start_delimiter + len(MODIFIED_CODE_DELIMITER_START)

169 end_of_ai_code_block_boundary = -1

170

171 if idx_explanation_start_delimiter != -1 and idx_explanation_start_delimiter > start_of_code_payload:

172 # Code ends right before explanations start

173 end_of_ai_code_block_boundary = idx_explanation_start_delimiter

174 else:

175 # No explanation block after code start, or malformed.

176 # Look for the AI's intended MODIFIED_CODE_DELIMITER_END after the code start.

177 end_of_ai_code_block_boundary = llm_full_response.rfind(

178 MODIFIED_CODE_DELIMITER_END, start_of_code_payload

179 )

180 if end_of_ai_code_block_boundary == -1: # MCE not found after MCS

181 # If no explanation start and no MCE after code start, assume code goes to end.

182 end_of_ai_code_block_boundary = len(llm_full_response)

183 logging.warning(

184 f"PARSER: No '{EXPLANATIONS_DELIMITER_START}' found after code, and no '{MODIFIED_CODE_DELIMITER_END}' found after code start. Assuming code extends to end of response."

185 )

186

187 if start_of_code_payload < end_of_ai_code_block_boundary:

188 # This segment is what the AI considers its code output, potentially ending with its own MODIFIED_CODE_DELIMITER_END

189 ai_code_output_segment = llm_full_response[start_of_code_payload:end_of_ai_code_block_boundary]

190 stripped_ai_code_segment = ai_code_output_segment.rstrip()

191

192 # Now, remove the AI's *actual* MODIFIED_CODE_DELIMITER_END from the end of this segment

193 if stripped_ai_code_segment.endswith(MODIFIED_CODE_DELIMITER_END):

194 modified_code = stripped_ai_code_segment[: -len(MODIFIED_CODE_DELIMITER_END)].strip()

195 else:

196 logging.warning(

197 f"PARSER: AI's code output segment (len {len(stripped_ai_code_segment)}) did not end with '{MODIFIED_CODE_DELIMITER_END}'. "

198 f"Segment tail (last 50 chars): '{repr(stripped_ai_code_segment[-50:])}'. Using segment as is (after stripping)."

199 )

200 modified_code = stripped_ai_code_segment.strip() # Use the segment as is, but stripped

201 else:

202 logging.warning(

203 f"PARSER: Code start payload index ({start_of_code_payload}) not before code end boundary ({end_of_ai_code_block_boundary}). Cannot extract code."

204 )

205

206 # --- Fallback for completely missing delimiters ---

207 # If after all attempts, modified_code is still None and explanations is None,

208 # and the original response wasn't empty, assume it's a completely malformed response

209 # and treat the whole thing as code.

210 if modified_code is None and explanations is None and llm_full_response.strip():

211 all_delimiters_missing = all(

212 delim not in llm_full_response

213 for delim in [

214 MODIFIED_CODE_DELIMITER_START,

215 MODIFIED_CODE_DELIMITER_END,

216 EXPLANATIONS_DELIMITER_START,

217 EXPLANATIONS_DELIMITER_END,

218 ]

219 )

220 if all_delimiters_missing:

221 logging.warning("PARSER: No delimiters found anywhere. Treating entire response as modified code.")

222 modified_code = llm_full_response.strip()

223 else:

224 # This case means some delimiters were found, but the structure didn't fit any parsing logic.

225 # modified_code and explanations remain None.

226 logging.warning("PARSER: Some delimiters found, but structure is unexpected. Cannot reliably parse.")

227

228 except Exception as e:

229 logging.exception(f"PARSER: Error during LLM response parsing: {e}")

230 # Ensure None is returned on exception

231 return None, None

232

233 logging.debug(f"PARSER: Final modified_code (first 100): {repr(modified_code[:100]) if modified_code else 'None'}")

234 logging.debug(f"PARSER: Final explanations (first 100): {repr(explanations[:100]) if explanations else 'None'}")

235 return modified_code, explanations

236

237

238def get_ai_suggestion(api_key: str, model_name: str, prompt: str) -> tuple[str | None, str | None]:

239 """Calls the Gemini API and gets the modified code and explanations."""

240 try:

241 genai.configure(api_key=api_key)

242 model = genai.GenerativeModel(model_name)

243 logging.info(f"Sending request to Gemini model '{model_name}'...")

244

245 generation_config = genai.types.GenerationConfig(candidate_count=1, temperature=0.1)

246

247 response = model.generate_content(prompt, generation_config=generation_config)

248

249 if not response.candidates or not response.candidates[0].content.parts:

250 logging.error("Error: Model did not return any content.")

251 if response.prompt_feedback:

252 logging.info(f"Prompt Feedback: {response.prompt_feedback}")

253 return None, None

254

255 full_llm_output = response.text

256

257 # --- DEBUGGING: Print raw LLM response ---

258 logging.debug("\n" + "=" * 20 + " RAW LLM RESPONSE " + "=" * 20)

259 logging.debug(f"Raw LLM Output Length: {len(full_llm_output)}")

260 # logging.debug(full_llm_output) # Uncomment to see the full raw output if needed

261 # For very detailed inspection of potential hidden characters:

262 # logging.debug("Representation of RAW LLM response (first 500 chars):")

263 # logging.debug(repr(full_llm_output[:500]))

264 # logging.debug("Representation of RAW LLM response (last 500 chars):")

265 # logging.debug(repr(full_llm_output[-500:]))

266 logging.debug("=" * (40 + len(" RAW LLM RESPONSE ")) + "\n")

267 # --- END DEBUGGING ---

268

269 return parse_llm_response(full_llm_output)

270

271 except google.generativeai.types.BlockedPromptException as bpe:

272 logging.error(f"Gemini API Error: Prompt was blocked. {bpe}")

273 # Potentially log bpe.response.prompt_feedback if available and relevant

274 return None, None

275 except google.api_core.exceptions.GoogleAPICallError as api_error:

276 logging.error(f"Google API Call Error: {api_error}")

277 # api_error often has structured details like api_error.code() or api_error.message

278 return None, None

279 except Exception as e:

280 logging.exception(f"Unexpected error calling Gemini API: {e}")

281 # Note: The specific Google API exceptions inherit from Exception, so this will catch them too,

282 # but the more specific handlers above allow for different logging/handling if needed.

283 # If you remove the specific handlers, this one will catch them.

284 if hasattr(e, "response") and e.response:

285 logging.error(f"API Response Status: {e.response.status_code}")

286 logging.error(f"API Response Body: {e.response.text}")

287 return None, None

288

289

290def generate_and_write_diff(

291 original_content: str, modified_content: str, target_file_path: str, explanations: str | None

292) -> bool:

293 """

294 Generates a diff and writes it to a .diff.<filename> file if actual code changes exist.

295 Explanations are included in the diff file if changes were made, or logged if no code changes.

296 """

297 original_filename = os.path.basename(target_file_path)

298 diff_filename = f".diff.{original_filename}"

299 diff_file_path = os.path.join(os.path.dirname(target_file_path), diff_filename)

300

301 # Primary condition: Only create a diff file if code content has actually changed.

302 # Use strip() to ignore leading/trailing whitespace differences

303 if original_content.strip() == modified_content.strip():

304 logging.info("Code content is identical to the original after stripping whitespace.")

305 if explanations:

306 # Log explanations, but do not create the diff file for the code.

307 logging.info(f"AI provided explanations for no code change:\n{explanations}")

308 else:

309 logging.info("No explanations provided for identical code.")

310 logging.info(f"Diff file '{diff_file_path}' will NOT be created as there are no actual code changes.")

311 return True # Operation considered successful, but no diff file generated for code.

312

313 # If we reach here, original_content.strip() != modified_content.strip(), so there are changes.

314 logging.info("Code content has changed. Generating diff file.")

315

316 original_lines = original_content.splitlines(keepends=True)

317 modified_lines = modified_content.splitlines(keepends=True)

318

319 # Create a unified diff

320 diff_generator = difflib.unified_diff(

321 original_lines,

322 modified_lines,

323 fromfile=f"a/{original_filename}",

324 tofile=f"b/{original_filename}",

325 lineterm="", # Avoids extra newlines if source lines already have them

326 )

327

328 diff_content_list = list(diff_generator)

329

330 # Although we checked strip(), difflib might still produce an empty list if changes are only whitespace/newlines

331 # or if there's some other subtle difference it doesn't represent in the diff format.

332 # We should still write the file if we reached this point based on the strip() check,

333 # but maybe add a note if the diff_content_list is empty unexpectedly.

334

335 try:

336 with open(diff_file_path, "w", encoding="utf-8") as f:

337 f.write(f"# Diff for {original_filename} (AI Suggested Changes)\n")

338 f.write("# Generated by auto_gemini_styleguide.py\n")

339 f.write("-" * 30 + " GIT-STYLE UNIFIED DIFF " + "-" * 30 + "\n")

340

341 if not diff_content_list:

342 logging.warning(

343 "Difflib generated an empty diff list, but content comparison (strip) indicated a difference. This is unusual."

344 )

345 f.write("--- Difflib reported no changes, but content comparison (strip) differed. ---\n")

346 else:

347 for line in diff_content_list:

348 f.write(line)

349

350 if explanations:

351 f.write("\n\n" + "-" * 30 + " AI EXPLANATIONS FOR CHANGES " + "-" * 30 + "\n")

352 f.write(explanations + "\n")

353 else:

354 f.write("\n\n" + "-" * 30 + " AI EXPLANATIONS FOR CHANGES " + "-" * 30 + "\n")

355 f.write("No specific explanations were provided by the AI for these changes.\n")

356

357 logging.info(f"Successfully wrote diff and explanations to '{diff_file_path}'.")

358 return True

359 except Exception as e:

360 logging.exception(f"Error writing to diff file '{diff_file_path}': {e}")

361 return False

362

363

364def main():

365 """Main function to autocorrect a file using Google AI and generate a diff."""

366 # Configure logging at the beginning of main or at module level

367 # Set level to DEBUG to see all parser logs and raw LLM response

368 logging.basicConfig(

369 level=logging.DEBUG,

370 format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s",

371 )

372

373 parser = argparse.ArgumentParser(

374 description="Autocorrects a file using Google AI according to a style guide and generates a diff with explanations.",

375 formatter_class=argparse.RawTextHelpFormatter,

376 )

377 parser.add_argument("target_file_path", help="The path to the Python file to autocorrect.")

378 parser.add_argument(

379 "--styleguide",

380 default=DEFAULT_STYLEGUIDE_PATH,

381 help=f"Path to the style guide markdown file (default: {DEFAULT_STYLEGUIDE_PATH}).",

382 )

383 parser.add_argument(

384 "--envfile",

385 default=DEFAULT_ENV_FILE_PATH,

386 help=f"Path to the .env file for API key (default: {DEFAULT_ENV_FILE_PATH}).",

387 )

388 parser.add_argument(

389 "--model",

390 default="gemini-2.0-flash-lite",

391 help="The Gemini model to use (e.g., 'gemini-2.0-flash-lite', 'gemini-1.5-flash-latest', 'gemini-pro').",

392 )

393 parser.add_argument(

394 "--backup", action="store_true", help="Create a backup of the original file (as .bak) before overwriting."

395 )

396 parser.add_argument(

397 "--no-modify", action="store_true", help="Do not modify the original file. Only generate the .diff file."

398 )

399 # Added --debug argument to control logging level from CLI

400 parser.add_argument("--debug", action="store_true", help="Enable debug logging for more verbose output.")

401

402 args = parser.parse_args()

403

404 # Set logging level based on --debug argument

405 # This overrides the basicConfig level if --debug is not used

406 # If --debug is used, basicConfig already set it to DEBUG

407 if not args.debug:

408 logging.getLogger().setLevel(logging.INFO)

409

410 logging.info("--- AI Code Style Corrector & Differ ---")

411

412 api_key = load_api_key(args.envfile)

413 if not api_key:

414 return 1

415

416 logging.info(f"Reading style guide from: {args.styleguide}")

417 style_guide_content = read_file_content(args.styleguide)

418 if style_guide_content is None:

419 return 1

420

421 logging.info(f"Reading target file: {args.target_file_path}")

422 original_code_content = read_file_content(args.target_file_path)

423 if original_code_content is None:

424 return 1

425

426 if args.backup:

427 backup_file_path = f"{args.target_file_path}.bak"

428 logging.info(f"Creating backup: {backup_file_path}")

429 if not write_file_content(backup_file_path, original_code_content):

430 logging.warning("Warning: Failed to create backup. Proceeding cautiously.")

431

432 prompt = construct_llm_prompt(style_guide_content, original_code_content, pathlib.Path(args.target_file_path).name)

433

434 modified_code, explanations = get_ai_suggestion(api_key, args.model, prompt)

435

436 if modified_code is not None:

437 logging.info("--- AI Suggestion Received ---")

438

439 # Decide if we should process changes (generate diff, potentially modify file)

440 # Process if code changed OR if explanations were provided (even if code didn't change)

441 should_process_changes = modified_code.strip() != original_code_content.strip() or bool(explanations)

442

443 if should_process_changes:

444 # generate_and_write_diff will now internally decide if a diff FILE is created

445 # based on whether the code content actually changed.

446 generate_and_write_diff(original_code_content, modified_code, args.target_file_path, explanations)

447

448 if args.no_modify:

449 logging.info(f"Original file '{args.target_file_path}' was NOT modified due to --no-modify flag.")

450 elif modified_code.strip() == original_code_content.strip():

451 # This case is hit if AI returned identical code but provided explanations.

452 # generate_and_write_diff logged the explanations and skipped diff file creation.

453 logging.info("AI returned identical code content. No changes made to the original file based on code.")

454 else:

455 # Code content actually changed, proceed with writing the modified file

456 logging.info(f"Attempting to write AI modified code back to '{args.target_file_path}'...")

457 logging.info("IMPORTANT: Please review the changes carefully after the script finishes.")

458 if write_file_content(args.target_file_path, modified_code):

459 logging.info("Original file successfully updated with AI suggestions.")

460 else:

461 logging.error("Failed to write modified code to the original file.")

462 return 1

463 else:

464 # modified_code is not None, but code is identical AND no explanations were provided.

465 # This is the case where AI returned the exact same code and nothing else.

466 logging.info("AI returned identical code content and no explanations. No changes made, no diff generated.")

467

468 logging.info("Process completed.")

469 else:

470 # This means get_ai_suggestion returned (None, explanations) or (None, None)

471 logging.error(

472 "Failed to get a valid modified code block from the AI. No changes made to the file. No diff generated."

473 )

474 if explanations: # If explanations were returned but code was None

475 logging.info("AI provided explanations, but no valid code block was parsed:")

476 logging.info("-" * 20 + " EXPLANATIONS " + "-" * 20)

477 logging.info(explanations) # Log the explanations

478 logging.info("-" * (40 + len(" EXPLANATIONS ")) + "\n")

479 return 1

480

481 return 0

482

483

484if __name__ == "__main__":

485 import sys

486

487 exit_code = main()

488 sys.exit(exit_code)

Coverage for coffee_maker/auto_gemini_styleguide.py: 73%

224 statements