开始文章之前可以先介绍下何为code interpreter。所谓code interpreter从实际操作讲就是让llm模型具备了立马执行代码、并把执行结果作为下轮模型生成的物料。这里面有两个关键词“立马执行代码”、“结果作为物料”,其实如果llm不具备控制计算机得到执行结果,并把生成的执行结果作为下一轮控制的物料,而只是能够生成静态的代码,那么llm不过还是一个静态的语言生成模型。但如果llm可以把生成的代码执行得到结果,那llm就是一个控制器和仿真器,根据需要生成代码动态的串接、补充需要的物料,以及对可能的选择做预测推断,让llm的能力直接无限制的扩张;这就是code interpreter的价值所在,让llm具备动态的自组装、调整可能组合、对决策做精准和模糊的仿真预测的能力。也就是说llm具备了解决实际问题的能力,而不是只能形而上的思考给出一些指导理论文案,而是可以切身的去尝试,从形而上到形而下的具体落地动作完全打通。
这也就是为什么code interpreter只是增加一个把生成代码可执行,这么一个看起来不太大的变化,让各大佬为之欢呼的原因。当然以现在的llm根据语言生成code的能力和code执行通过率(环境该有包没有),离稳健的商用系统还是有距离的。当然这些能力的提升事需要llm全面提升,甚至需要构建一个系统来解决的;这个一定事需要时间来沉淀和打磨,当然这也是机会所在。
1.图生成,传入原始图,指令生成抠人像的code,code interpreter处理结果,然后在对图像做image2image生成或者补背景;生成图在做线稿生成
2.文本生成,llm生成的文本有不合规多标点符号共现,指令生成正则处理code,code interpreter处理结果,然后在做下一步的文本抽摘要,或者文本改写
2.利用python interpreter方法来实现,起一个服务器接llm生成code,执行完结果返回给llm服务器
3.把llm生成的code存成py文件,llm服务器python os执行code
- prog = '''# 导入需要的库
- import pandas as pd
- import matplotlib.pyplot as plt
- import seaborn as sns
- # 生成示例数据,假设是关于学生的成绩,年龄,性别等信息
- df = pd.DataFrame({
- "name": ["Alice", "Bob", "Charlie", "David", "Eve"],
- "score": [90, 80, 70, 60, 50],
- "age": [18, 19, 20, 21, 22],
- "gender": ["F", "M", "M", "M", "F"]
- })
- # 查看数据的基本信息,如行数,列数,数据类型,缺失值等
- df.info()
- # 查看数据的统计描述,如均值,标准差,最大值,最小值等
- df.describe()
- # 选择需要分析的列,假设是score, age, gender
- cols = ["score", "age", "gender"]
- # 绘制直方图,查看每列的分布情况
- df[cols].hist(figsize=(10, 8))
- plt.show()
- # 绘制箱线图,查看每列的异常值情况
- df[cols].boxplot(figsize=(10, 8))
- plt.show()
- # 绘制散点图矩阵,查看每两列之间的相关性
- sns.pairplot(df[cols])
- plt.show()'''
- c = exec(prog)

- # 所以可以通过os.system来执行py代码
- import os
- os.system('python file_name.py')
- from jupyter_client import KernelManager
- import re
- class JupyterNotebook:
- def __init__(self):
- self.km = KernelManager()
- self.km.start_kernel()
- self.kc = self.km.client()
- def clean_output(self,outputs):
- outputs_only_str = list()
- for i in outputs:
- if type(i)==dict:
- if ('text/plain' in list(i.keys())):
- outputs_only_str.append(i['text/plain'])
- elif type(i)==str:
- outputs_only_str.append(i)
- elif type(i) == list:
- error_msg = '\n'.join(i)
- error_msg = re.sub(r'\x1b\[.*?m', '', error_msg)
- outputs_only_str.append(error_msg)
- return '\n'.join(outputs_only_str).strip()
- def add_and_run(self, code_string):
- # Execute the code and get the execution count
- msg_id = self.kc.execute(code_string)
- # Wait for and return the outputs
- outputs = []
- error_flag = False
- while True:
- try:
- msg = self.kc.get_iopub_msg(timeout=10)
- msg_type = msg['header']['msg_type']
- content = msg['content']
- if msg_type == 'execute_result':
- outputs.append(content['data'])
- elif msg_type == 'stream':
- outputs.append(content['text'])
- elif msg_type == 'error':
- error_flag = True
- outputs.append(content['traceback'])
- # If the execution state of the kernel is idle, it means the cell finished executing
- if msg_type == 'status' and content['execution_state'] == 'idle':
- break
- except:
- break
- #print(outputs)
- return self.clean_output(outputs), error_flag

- class BaseCodeInterpreter:
- def __init__(self):
- self.dialog = [
- {"role": "system", "content": CODE_INTERPRETER_SYSTEM_PROMPT,},
- #{"role": "user", "content": "How can I use BeautifulSoup to scrape a website and extract all the URLs on a page?"},
- #{"role": "assistant", "content": "I think I need to use beatifulsoup to find current korean president,"}
- ]
- self.nb = JupyterNotebook()
- #把llm生成的code部分抽取出来
- @staticmethod
- def extract_code_blocks(text : str):
- pattern = r'```(?:python\n)?(.*?)```' # Match optional 'python\n' but don't capture it
- code_blocks = re.findall(pattern, text, re.DOTALL)
- return [block.strip() for block in code_blocks]
- @staticmethod
- def parse_last_answer(text: str) -> str:
- return text.split(E_INST)[-1]
- #把llm生成的抽取的code塞到jupyter解释器执行,得到结果返回给用户
- def execute_code_and_return_output(self, code_str: str) -> str:
- outputs, error_flag = self.nb.add_and_run(code_str)
- return outputs, error_flag

llm模型把生成代码生成能力封装到进去,code interpreter能力就具备了,下面代码model_path换成chatglm、codegeex2-6b都行。
- class LlamaCodeInterpreter(BaseCodeInterpreter):
- def __init__(self, model_path: str, load_in_8bit : bool = False, load_in_4bit : bool = False):
- #self.model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto", load_in_4bit = load_in_4bit,load_in_8bit=load_in_8bit, torch_dtype=torch.float16,use_safetensors=True)
- #self.tokenizer = LlamaTokenizer.from_pretrained(model_path)
- self.tokenizer = AutoTokenizer.from_pretrained(model_path,trust_remote_code=True)
- self.model = AutoModel.from_pretrained(model_path,trust_remote_code=True).cuda()
- '''
- # Add special token
- special_tokens_dict = dict()
- if self.tokenizer.pad_token is None:
- special_tokens_dict["pad_token"] = DEFAULT_PAD_TOKEN
- if self.tokenizer.eos_token is None:
- special_tokens_dict["eos_token"] = DEFAULT_EOS_TOKEN
- if self.tokenizer.bos_token is None:
- special_tokens_dict["bos_token"] = DEFAULT_BOS_TOKEN
- if self.tokenizer.unk_token is None:
- special_tokens_dict["unk_token"] = DEFAULT_UNK_TOKEN
- smart_tokenizer_and_embedding_resize(
- special_tokens_dict=special_tokens_dict,
- tokenizer=self.tokenizer,
- model=self.model,
- )
- '''
- self.dialog = [
- {"role": "system", "content": CODE_INTERPRETER_SYSTEM_PROMPT + "\nUse code to answer",},
- #{"role": "user", "content": "How can I use BeautifulSoup to scrape a website and extract all the URLs on a page?"},
- #{"role": "assistant", "content": "I think I need to use beatifulsoup to find current korean president,"}
- ]
- self.nb = JupyterNotebook()
- def dialog_to_prompt(self, dialog: List[Dialog], SYS_PROMPT: str = '') -> torch.Tensor:
- """
- code borrowed from : https://github.com/facebookresearch/llama/blob/main/llama/generation.py
- """
- if dialog[0]["role"] != "system":
- dialog = [
- {
- "role": "system",
- "content": SYS_PROMPT,
- }
- ] + dialog
- dialog = [
- {
- "role": dialog[1]["role"],
- "content": B_SYS + dialog[0]["content"] + E_SYS + dialog[1]["content"],
- }
- ] + dialog[2:]
- assert all([msg["role"] == "user" for msg in dialog[::2]]) and all(
- [msg["role"] == "assistant" for msg in dialog[1::2]]
- ), (
- "model only supports 'system', 'user' and 'assistant' roles, "
- "starting with 'system', then 'user' and alternating (u/a/u/a/u...)"
- )
- #print(dialog[::2], dialog[1::2],)
- dialog_tokens: List[int] = sum(
- [
- self.tokenizer.encode(
- f"{B_INST} {(prompt['content']).strip()} {E_INST} {(answer['content']).strip()} ",
- )
- for prompt, answer in zip(
- dialog[::2],
- dialog[1::2],
- )
- ],
- [],
- )
- #assert (
- # dialog[-1]["role"] == "user"
- #), f"Last message must be from user, got {dialog[-1]['role']}"
- dialog_tokens += self.tokenizer.encode(
- f"{B_INST} {(dialog[-1]['content']).strip()} {E_INST}",
- )
- return torch.tensor(dialog_tokens).unsqueeze(0)
- def hard_coded_eos_splitter(self):
- self.dialog[-1]['content'] = self.dialog[-1]['content'].split(DEFAULT_EOS_TOKEN)[0]
- def chat(self, user_message: str, VERBOSE :bool = False):
- self.dialog.append({"role": "user", "content": user_message})
- code_block_output = ""
- attempt = 0
- img_data = None
- print('###User : ' + Fore.BLUE + Style.BRIGHT + user_message + Style.RESET_ALL)
- print('\n###Assistant : ')
- while True:
- if attempt > 3:
- break
- dialog_tokens = self.dialog_to_prompt(dialog=self.dialog)
- gen_tokens = self.model.generate(dialog_tokens.cuda(),
- max_new_tokens=4096,
- top_p=0.8,
- temperature=0.95,
- do_sample=True,
- use_cache=True)
- generated_text_all = self.tokenizer.batch_decode(gen_tokens)[0]
- generated_text = self.tokenizer.batch_decode(gen_tokens[:, dialog_tokens.shape[1]:])[0]
- last_answer = self.parse_last_answer(generated_text_all)
- generated_code_blocks = self.extract_code_blocks(generated_text)
- if len(generated_code_blocks) > 0:
- # Find the position of the first code block in the last answer
- first_code_block_pos = generated_text.find(generated_code_blocks[0]) if generated_code_blocks else -1
- text_before_first_code_block = generated_text if first_code_block_pos == -1 else generated_text[:first_code_block_pos]
- print(Fore.GREEN + text_before_first_code_block + Style.RESET_ALL)
- print(Fore.YELLOW + generated_code_blocks[0]+ '\n```\n' + Style.RESET_ALL)
- code_block_output, error_flag = self.execute_code_and_return_output(generated_code_blocks[0])
- code_block_output = f'{code_block_output}'
- if code_block_output is not None:
- code_block_output = code_block_output.strip()
- code_block_output_str = f'\n```RESULTS\n{code_block_output}\n```\n'
- print(Fore.LIGHTBLACK_EX + code_block_output_str + Style.RESET_ALL)
- #markdown = Markdown(code_block_output_str)print(markdown)
- gen_final = f'{text_before_first_code_block}{generated_code_blocks[0]}\n```{code_block_output_str}'
- if self.dialog[-1]['role'] == 'user':
- self.dialog.append({"role": "assistant", "content": gen_final})
- elif self.dialog[-1]['role'] == 'assistant':
- self.dialog[-1]['content'] += gen_final
- else:
- if self.dialog[-1]['role'] == 'user':
- self.dialog.append({"role": "assistant", "content": generated_text})
- else:
- self.dialog[-1]['content'] += generated_text
- # no code found break
- print(Fore.GREEN + generated_text + Style.RESET_ALL)
- break
- # early stop
- if DEFAULT_EOS_TOKEN in self.dialog[-1]['content']:
- self.hard_coded_eos_splitter()
- if img_data is not None:
- return f'{self.dialog[-1]}\n'
- return self.dialog[-1]
- self.hard_coded_eos_splitter()
- attempt += 1
- #print(f"====Attempt[{attempt}]====\n{self.dialog[-1]['content']}")
- #print(self.dialog)
- if img_data is not None:
- return f'{self.dialog[-1]}\n'
- return self.dialog[-1]

1.文章从技术趋势的酵素介绍了code interpreter的价值和有意义的方向在何
2.介绍了code interpreter实现的核心问题,就是如何把llm生成的code,可以调器编译器执行
