With the new Grok 4 that has a huge context I thought it would be interesting to try feed it the Flax API at the start of any discussion about Flax code generation.
In theory it should help it to keep the code Flax only rather than hallucinate a lot of Unity calls, like these LLMs tend to due to the massive amount of Unity-based training data used.
So I downloaded:
Then created the following script that will generate flax_api_reference.txt, which can be dragged into the LLM chat.
The file gets quite large if all APIs are included, but you can just delete any HTML files that are not relevant.
I havent tested it much yet, but I did have a lot of issues with Unity code being inserted in my Flax code earlier, hopefully this can be useful to someone.
import os
from bs4 import BeautifulSoup
import re
# Path to the downloaded api directory
api_dir = r"C:\Users\sinsro\Desktop\temp\flax\FlaxDocsHost-master\api" # Adjust if needed
output_file = "flax_api_reference.txt"
def extract_content(file_path):
filename = os.path.basename(file_path)
with open(file_path, 'r', encoding='utf-8') as f:
html = f.read()
soup = BeautifulSoup(html, 'html.parser')
content = soup.find('article', {'id': '_content'})
if not content:
return f"No extractable content in {filename}\n"
text = ""
# H1: Class/Struct/Enum name
h1 = content.find('h1')
if h1:
page_title = h1.get_text(strip=True)
text += f"{page_title}\n\n"
# Summary/Description
summary = content.find('div', class_='markdown level0 summary')
if summary:
summary_text = ' '.join([p.get_text(strip=True) for p in summary.find_all('p')])
text += f"{summary_text}\n\n"
# Inheritance
inheritance_header = content.find('h5', string=re.compile(r'Inheritance'))
if inheritance_header:
inheritance_div = inheritance_header.find_next('div', class_='inheritance')
if inheritance_div:
inh_parts = [span.get_text(strip=True) for span in inheritance_div.find_all('span') if span.get_text(strip=True)]
inh_text = ' > '.join(inh_parts)
text += f"Inheritance {inh_text}\n\n"
# Namespace
namespace_header = content.find('h6', string=re.compile(r'Namespace'))
if namespace_header:
ns_a = namespace_header.find_next('a')
ns_text = ns_a.get_text(strip=True) if ns_a else ''
text += f"Namespace {ns_text}\n\n"
# Assembly
assembly_header = content.find('h6', string=re.compile(r'Assembly'))
if assembly_header:
asm_code = assembly_header.find_next('code')
asm_text = asm_code.get_text(strip=True) if asm_code else ''
text += f"Assembly {asm_text}\n\n"
# Syntax
syntax_header = content.find('h5', string=re.compile(r'Syntax'))
if syntax_header:
text += "Syntax\n\n"
syntax_pre = syntax_header.find_next('pre')
if syntax_pre:
syntax_code = syntax_pre.get_text(strip=True)
text += syntax_code.replace('\n', '\n') + "\n\n"
# Sections like Fields, Properties, etc.
sections = content.find_all('h3', id=lambda x: x and (x.startswith('fields') or x.startswith('properties') or x.startswith('methods') or x.startswith('extensionmethods') or x.startswith('interfaces')))
for section in sections:
section_title = section.get_text(strip=True)
text += f"{section_title}\n\n"
current = section.next_sibling
while current and (not current.name or current.name != 'h3'):
if current.name == 'h4': # Member name
member_name = current.get_text(strip=True)
text += f"{member_name}\n"
# Get the member container div
member_container = current.find_next_sibling('div', class_='collapse in')
if member_container:
# Summary
member_summary = member_container.find('div', class_='markdown level1 summary')
if member_summary and member_summary.get_text(strip=True):
member_desc = member_summary.get_text(strip=True)
text += f"{member_desc}\n"
# Declaration
decl_header = member_container.find('h5', string=re.compile(r'Declaration'))
if decl_header:
text += "Declaration\n"
decl_pre = decl_header.find_next('pre')
if decl_pre:
decl_code = decl_pre.get_text(strip=True)
decl_code_indented = decl_code.replace('\n', '\n')
text += f"{decl_code_indented}\n"
# Field/Property Value
value_header = member_container.find('h5', class_=['fieldValue', 'propertyValue'])
if value_header:
text += "Type\n"
value_table = value_header.find_next('table')
if value_table:
value_td = value_table.find('td')
value_text = value_td.get_text(strip=True) if value_td else ''
text += f"{value_text}\n"
text += "\n"
elif current.name == 'table': # For namespaces or classes lists
table_header = section.get_text(strip=True) if section else 'Table'
text += f"{table_header}:\n"
for row in current.find_all('tr'):
cells = [cell.get_text(strip=True) for cell in row.find_all(['th', 'td'])]
if cells:
text += " " + " | ".join(cells) + "\n"
text += "\n"
current = current.next_sibling
text += "\n"
# Extension Methods (special handling if not caught above)
ext_section = content.find('h3', id='extensionmethods')
if ext_section:
text += "Extension Methods\n\n"
ext_divs = ext_section.find_next_siblings('div', class_=None)
for div in ext_divs:
if div.find('a', class_='xref'):
ext_a = div.find('a', class_='xref')
ext_name = ext_a.get_text(strip=True)
text += f"{ext_name}\n"
ext_summary = div.find('div', class_='markdown level1 summary')
if ext_summary and ext_summary.get_text(strip=True):
ext_desc = ext_summary.get_text(strip=True)
text += f"{ext_desc}\n"
text += "\n"
# Clean duplicates
lines = text.split('\n')
clean_lines = []
seen = set()
for line in lines:
stripped = line.strip()
if stripped and stripped not in seen:
seen.add(stripped)
clean_lines.append(line)
text = '\n'.join(clean_lines) + '\n\n'
return text
def generate_api_reference():
with open(output_file, 'w', encoding='utf-8') as f:
# Get all html files, sort them alphabetically, skip index.html and toc.html
all_files = []
for root, dirs, files in os.walk(api_dir):
for file in files:
if file.endswith('.html') and file not in ('index.html', 'toc.html'):
all_files.append(os.path.join(root, file))
all_files.sort(key=lambda x: os.path.basename(x))
for file_path in all_files:
f.write(extract_content(file_path))
# Run the generation
generate_api_reference()
print(f"API reference saved to {output_file}")