# Install required packages
!pip install -q -U transformers tqdm jsonschema pyyaml termcolor dotenv tenacity


from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


import yaml
import json
import os
import argparse
from termcolor import colored
import subprocess
import time
from tenacity import retry, wait_random_exponential, stop_after_attempt
from transformers import AutoTokenizer
from tqdm import tqdm
import concurrent.futures
import jsonschema
from jsonschema import validate


## Authenticate to Hugging Face to pull and push models
# !pip install huggingface_hub -q
# from huggingface_hub import notebook_login

# notebook_login()


# Here I'm using a self-hosted model, but you could swap this for gpt-3.5-turbo, etc.
# I've made available a one-click deployment template for Runpod here:
# https://runpod.io/console/gpu-cloud?template=t6sgcn049x&ref=n2u8jwou
model = "openchat/openchat_3.5" # for extraction
api_endpoint = "https://xd3lef1do5g8d0-8080.proxy.runpod.net" # where its being served


# Towards the end of this notebook, we'll instantiate this class and use it to
# perform extraction on a text file
class Config:
    def __init__(self, chunk_length=8000, output_format="json", output_file_name="output",
                 batching=True, input_file_name="input.txt"):
        self.chunk_length = chunk_length
        self.output_format = output_format
        self.output_file_name = output_file_name
        self.batching = batching
        self.input_file_name = input_file_name


# utils.py
from termcolor import colored
import os


def pretty_print_conversation(messages):
    role_to_color = {
        "system": "red",
        "user": "green",
        "assistant": "blue",
        "tool": "magenta",
    }

    for message in messages:
        if message["role"] == "system":
            print(
                colored(
                    f"system: {message['content']}\n", role_to_color[message["role"]]
                )
            )
        elif message["role"] == "user":
            print(
                colored(f"user: {message['content']}\n", role_to_color[message["role"]])
            )
            with open("user_request.txt", "w") as file:
                file.write(message["content"] + "\n")
        elif message["role"] == "assistant" and message.get("function_call"):
            print(
                colored(
                    f"assistant: {message['function_call']}\n",
                    role_to_color[message["role"]],
                )
            )
        elif message["role"] == "assistant" and not message.get("function_call"):
            print(
                colored(
                    f"assistant: {message['content']}\n", role_to_color[message["role"]]
                )
            )
        elif message["role"] == "tool":
            print(
                colored(
                    f"function ({message['name']}): {message['content']}\n",
                    role_to_color[message["role"]],
                )
            )


def read_text_file(text_file):
    with open(text_file, "r") as file:
        text = file.read()
    return text


def check_output_file_format(output_file_name, output_format):
    # Check if output_file has an extension
    _, file_extension = os.path.splitext(output_file_name)
    if not file_extension:
        # If not, add extension based on output_format
        output_file_name = f"{output_file_name}.{output_format}"

    return output_file_name


# prompts.py
import json
import yaml

def read_schema(file_path):
    """Reads a JSON or YAML schema from a given file path."""
    try:
        if file_path.endswith('.json'):
            with open(file_path, "r") as f:
                return json.load(f)
        elif file_path.endswith('.yaml') or file_path.endswith('.yml'):
            with open(file_path, "r") as f:
                return yaml.safe_load(f)
        else:
            raise ValueError("Unsupported file format. Please use '.json' or '.yaml/.yml'.")
    except Exception as e:
        raise FileNotFoundError(f"Error reading file: {e}")

def generate_example(schema):
    """Generates an example object based on the provided schema."""
    example = {}
    for key, value in schema["properties"].items():
        data_type = value.get("type", "string")
        if isinstance(data_type, list):
            data_type = data_type[0]

        example[key] = {
            "string": f"sample_string",
            "integer": 1,
            "boolean": True,
            "array": generate_array_example(value)
        }.get(data_type, "sample_value")

    return example

def generate_array_example(value):
    """Generates an example array based on the array type in schema."""
    item_type = value.get("items", {}).get("type", "string")
    if isinstance(item_type, list):
        item_type = item_type[0]

    return {
        "string": [f"sample_string_{i+1}" for i in range(2)],
        "integer": [i+1 for i in range(2)],
        "boolean": [True, False]
    }.get(item_type, ["sample_value"])

def create_extract_prompt(schema, data_format):
    """Creates an extraction prompt based on the provided schema and data format."""
    example = generate_example(schema)
    if data_format.lower() == "json":
        schema_str = json.dumps(schema, indent=4)
        example_str = json.dumps(example, indent=4)
    elif data_format.lower() == "yaml":
        schema_str = yaml.dump(schema, default_flow_style=False, indent=4)
        example_str = yaml.dump(example, default_flow_style=False, indent=4)
    else:
        raise ValueError("Unsupported data format. Please use 'JSON' or 'YAML'.")

    prompt = (
        f"Extract names and organizations from the provided text, and return them in {data_format} format. "
        f"Use the following schema:\n\n{schema_str}\n\n"
        f"Here's an example of a response in {data_format} format:\n\n{example_str}\n\n"
        f"Do not include anything that is not explicitly mentioned in the text. "
        f"Analyse the text carefully to ensure all requested data is extracted. "
        f"Include each name and organization only once. "
        f"Adhere strictly to the response format without adding extra spaces or text."
    )
    return prompt

{
    "type": "object",
    "properties": {
        "names": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "organisations": {
            "type": "array",
            "items": {
                "type": "string"
            }
        }
    },
    "required": [
        "names",
        "organisations"
    ]
}


# json_validation_aggregation.py
import json
import jsonschema
from jsonschema import validate
from typing import Dict, Any

class JsonAggregator:
    def __init__(self, schema_file: str):
        self.schema = self.load_schema(schema_file)
        self.aggregated_data = {key: set() for key in self.schema["properties"].keys()}
        self.success = 0
        self.fail = 0

    def load_schema(self, schema_file: str) -> Dict[str, Any]:
        with open(schema_file, "r") as file:
            return json.load(file)

    def validate_json(self, data: Dict[str, Any]) -> bool:
        try:
            validate(instance=data, schema=self.schema)
            return True
        except jsonschema.exceptions.ValidationError:
            return False

    def aggregate_json(self, json_data: Dict[str, Any]):
        if self.validate_json(json_data):
            self.success += 1
            for key, values in json_data.items():
                if isinstance(values, list):
                    self.aggregated_data[key].update(values)
        else:
            self.fail += 1

    def write_aggregated_data(self, output_file: str):
        final_data = {key: list(value) for key, value in self.aggregated_data.items()}
        with open(output_file, "w") as file:
            json.dump(final_data, file, indent=4)
        print(f"Aggregation complete! The aggregated data has been written to '{output_file}'.")

# Example usage
# aggregator = JsonAggregator("your_schema_file.json")
# aggregator.aggregate_json(your_json_data)
# aggregator.write_aggregated_data("output_file.json")

type: object
properties:
  names:
    type: array
    items:
      type: string
  organisations:
    type: array
    items:
      type: string
required: [names, organisations]


# yaml_validation_aggregation
import yaml
from jsonschema import validate
from typing import Dict, Any
import jsonschema

class YamlAggregator:
    """
    A class used to aggregate YAML data based on a provided schema.

    ...

    Attributes
    ----------
    schema : Dict[str, Any]
        a dictionary representing the YAML schema
    aggregated_data : Dict[str, Any]
        a dictionary to store the aggregated data

    Methods
    -------
    load_schema(schema_file: str)
        Loads the YAML schema from a file.
    validate_yaml(data: Dict[str, Any])
        Validates the YAML data against the schema.
    aggregate_yaml(yaml_data: Dict[str, Any])
        Aggregates the YAML data.
    write_aggregated_data(output_file: str)
        Writes the aggregated data to a file.
    """

    def __init__(self, schema_file: str):
        self.schema = self.load_schema(schema_file)
        self.aggregated_data = {
            key: [] if self.schema["properties"][key]["type"] == "array" else None
            for key in self.schema["properties"].keys()
        }
        self.success = 0
        self.fail = 0

    def load_schema(self, schema_file: str) -> Dict[str, Any]:
        """Loads the YAML schema from a file."""
        with open(schema_file, "r") as file:
            return yaml.safe_load(file)

    def validate_yaml(self, data: Dict[str, Any]) -> bool:
        """Validates the YAML data against the schema."""
        try:
            validate(instance=data, schema=self.schema)
            print("YAML validation successful!")
            return True
        except jsonschema.exceptions.ValidationError as ve:
            print(f"Invalid yaml error - {ve}")
            return False

    def aggregate_yaml(self, yaml_data: Dict[str, Any]):
        """Aggregates the YAML data."""
        # Validate the YAML data
        is_valid = self.validate_yaml(yaml_data)
        if is_valid:
            self.success += 1
            # Aggregate the data
            for key, value in yaml_data.items():
                if key in self.aggregated_data:
                    # If the key is in the aggregated data, append or update the value based on its type
                    if self.schema["properties"][key]["type"] == "array":
                        # If the value is a list, extend the existing list
                        self.aggregated_data[key].extend(value)
                        # De-duplicate and sort the list
                        self.aggregated_data[key] = sorted(set(self.aggregated_data[key]))
                    else:
                        # If the value is not a list, update the existing value
                        self.aggregated_data[key] = value
        else:
            self.fail += 1

    def write_aggregated_data(self, output_file: str):
        """Writes the aggregated data to a file."""
        with open(output_file, "w") as file:
            yaml.dump(self.aggregated_data, file)
        print(
            f"Aggregation complete! The aggregated data has been written to '{output_file}'."
        )


import os
import subprocess
import json
import time
from tenacity import retry, wait_random_exponential, stop_after_attempt
from transformers import AutoTokenizer

# model = "openchat/openchat_3.5" # for extraction
# api_endpoint = "https://xd3lef1do5g8d0-8080.proxy.runpod.net" # model endpoint

tgi_api_base = api_endpoint + "/generate"

tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True)

# # Manual chat template
# tokenizer.chat_template = '''{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{%- set ns = namespace(found=false) -%}{%- for message in messages -%}{%- if message['role'] == 'system' -%}{%- set ns.found = true -%}{%- endif -%}{%- endfor -%}{{bos_token}}{%- if not ns.found -%}{# Suppressed System Message #}{%- endif %}{%- for message in messages %}{%- if message['role'] != 'system' %}{%- if message['role'] == 'user' %}{{'### Instruction:\\n' + message['content'] + '\\n'}}{%- else %}{{'### Response:\\n' + message['content'] + '\\n\\n'}}{%- endif %}{%- endif %}{%- endfor %}{% if add_generation_prompt %}{{'### Response:'}}{% endif %}'''

@retry(wait=wait_random_exponential(multiplier=1, max=40), stop=stop_after_attempt(3))
def chat_completion_request_runpod(messages):
    # formatted_messages = format_messages(messages)

    formatted_messages = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )

    print(formatted_messages)

    # Properly escape the string for JSON and for shell execution
    json_payload = json.dumps(
        {
            "inputs": formatted_messages,
            "parameters": {
                "max_new_tokens": 500,
                "do_sample": False,
                # "repetition_penalty": 1.1, #can be useful for json, less so for yaml.
            },
        }
    )
    escaped_json_payload = json_payload.replace(
        "'", "'\\''"
    )  # Escape single quotes for shell

    start_time = time.time()  # Start timing

    try:
        # Execute the curl command
        curl_command = f"curl -s {tgi_api_base} -X POST -d '{escaped_json_payload}' -H 'Content-Type: application/json'"

        response = subprocess.run(
            curl_command, shell=True, check=True, stdout=subprocess.PIPE
        )
        response_time = time.time() - start_time  # Calculate response time

        response = response.stdout.decode()

        # print(response)

        response = json.loads(response).get("generated_text", "No generated text found")

        # Calculate tokens per second
        tokens_generated = len(response) / 4  # assuming 4 characters per word
        tokens_per_second = tokens_generated / response_time if response_time > 0 else 0

        # Print time taken and tokens per second
        print(f"Tokens generated: {tokens_generated:.2f}")
        print(f"Total Time Taken: {response_time:.2f} seconds")
        print(f"Tokens per Second: {tokens_per_second:.2f}")
        print(response)

        return response
    except subprocess.CalledProcessError as e:
        print("Unable to generate ChatCompletion response")
        print(f"Exception: {e}")
        return str(e)

/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: 
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  warnings.warn(

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/491 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Instatiate a new configuration
project_dir = "/content/drive/My Drive/data_extraction" # Adjust if using Google Drive
output_dir = "/outputs"

config = Config(
    chunk_length=8000, # Customize this as needed
    output_format="json", # or "yaml"
    output_file_name="output.json", # Adjust based on your preference
    batching=True,
    input_file_name=f"{project_dir}/input_files/berkshire23_60k.txt"
)

# Define schema
json_schema_file = f"{project_dir}/json_files/json_schema.json"
yaml_schema_file = f"{project_dir}/yaml_files/yaml_schema.yaml"
json_schema = read_schema(json_schema_file) # Adjust location as necessary
yaml_schema = read_schema(yaml_schema_file) # Adjust location as necessary

# Create prompts
json_extract_prompt = create_extract_prompt(json_schema, "JSON")
yaml_extract_prompt = create_extract_prompt(yaml_schema, "YAML")


# Read input file
text = read_text_file(config.input_file_name)


# Prepare prompts
prompt = json_extract_prompt if config.output_format == "json" else yaml_extract_prompt


# Split text into chunks
block_size = config.chunk_length
chunks = [text[i : i + block_size] for i in range(0, len(text), block_size)]


import concurrent.futures

# Define a function to send a request
def send_request(message):
    chat_response = chat_completion_request_runpod([message])
    return chat_response, message


# Define a function to process the chat response
def process_chat_response(chat_response, output_format):
    try:
        chat_response_dict = (
            json.loads(chat_response)
            if output_format == "json"
            else yaml.safe_load(chat_response.strip())
        )
        aggregator.aggregate_json(
            chat_response_dict
        ) if output_format == "json" else aggregator.aggregate_yaml(chat_response_dict)
    except (json.JSONDecodeError, yaml.YAMLError):
        print(f"Invalid {output_format.upper()} in chat response: {chat_response}")
        aggregator.fail += 1


# Create messages
message_lists = [
    [
        {
            "role": "user",
            "content": f"""{prompt}\n\n[TEXT_START]\n\n...{text[i : i + block_size]}...\n\n[TEXT_END]\n\nNow, answer immediately and only in {config.output_format} format.""",
        }
    ]
    for i in range(0, len(text), block_size)
]

if config.batching:
    # Initialize a counter
    request_counter = 0

    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Send the requests in parallel
        future_to_chat_response = {
            executor.submit(send_request, messages[0]): messages for messages in message_lists
        }

        for future in concurrent.futures.as_completed(future_to_chat_response):
            messages = future_to_chat_response[future]
            try:
                chat_response, _ = future.result()
            except Exception as exc:
                print(f"{messages[0]} generated an exception: {exc}")
            else:
                # Increment the counter
                request_counter += 1

                # Process the chat response
                process_chat_response(chat_response, config.output_format)

    print(f"Total number of requests: {request_counter}")

else:
    for messages in tqdm(message_lists):
        chat_response = chat_completion_request_runpod(messages)

        # Process the chat response
        process_chat_response(chat_response, config.output_format)

# Write the aggregated data to a file
aggregator.write_aggregated_data(f"{project_dir+output_dir}/{config.output_file_name}")
if not aggregator.success:
    print("All validations failed")
else:
    total_attempts = aggregator.success + aggregator.fail
    if total_attempts > 0:
        error_rate = aggregator.fail / total_attempts
        print(f"Error rate is {error_rate}")
    else:
        print("No attempts were made, so the error rate cannot be calculated.")

<s>GPT4 Correct User: Extract names and organizations from the provided text, and return them in JSON format. Use the following schema:

{
    "type": "object",
    "properties": {
        "names": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "organisations": {
            "type": "array",
            "items": {
                "type": "string"
            }
        }
    },
    "required": [
        "names",
        "organisations"
    ]
}

Here's an example of a response in JSON format:

{
    "names": [
        "sample_string_1",
        "sample_string_2"
    ],
    "organisations": [
        "sample_string_1",
        "sample_string_2"
    ]
}

Do not include anything that is not explicitly mentioned in the text. Analyse the text carefully to ensure all requested data is extracted. Include each name and organization only once. Adhere strictly to the response format without adding extra spaces or text.

[TEXT_START]

...we are here live in Omaha Nebraska good morning everybody I'm Becky quick
along with Mike santoli and in just 30 minutes time Berkshire Hathaway chairman and CEO Warren Buffett's going to be
taking the stage with his vice chair Charlie Munger the legendary duo will also be joined by berkshire's two other
Vice chairs Greg Abel who manages the non-insurance operations for the company and Ajit Jain who runs all of the
insurance businesses and as always it's pretty big crowd here lots and lots of people and a few people you might notice
too Tim Cook is here Apple of course is still berkshire's largest holding big big part of its portfolio there you see
him backstage getting ready to go out and take his seat he gets to sit down in the special seats by the way that's
Debbie pasonic Warren's assistant who's standing by just went bite beside him also in the crowd Bill Murray he has
been here for a couple of days been hanging around you can check out he is taking a seat right now too some other
big people who are expected to be here some well-known names Jane Frazier who's the city CEO Ruth parat of alphabet
she's actually here I did and see her a little earlier and by the way this was a scene just a short time ago as the
Berkshire shareholders who were first in line started streaming onto the floor Mike this is like um Black Friday used
to be yeah day after Thanksgiving people waiting lining up these were the doors
um yeah it was a big rush they opened and everybody comes running in and it's not just because of the rain outside absolutely if it's a massive Arena
Convention Center so people know and uh I don't know what time do they start lining up I looked out the window at 5
30 and I thought huh that's not as big of a crowd as I've seen in years past but then I realized it's because they
changed the setup for this people used to stand about 10 Deep right at the door this year they pushed everybody the
crowds down the the block because they I guess they didn't want too much of a mob scene in one place if you watched it
went for blocks I didn't realize it till I went to the other side of the hotel to see it went for blocks and blocks and I've honestly never seen a line this
long a lot of people around the convention center talking about how it's it's buzzier and more crowded in the
exhibit all this year versus last year which was the first after the pandemic of course they broke for two years so it
seems like there is a little more of a interest in uh in being here in person well they know how to time it there's a lot of news that's happening um usually
you get about 40 000 people who are here this year they've been running ahead just in terms of the number of tickets
that they gave out and I talked to Warren Buffett briefly last night he said that they had 6 000 people who
showed up at will call yesterday to try and get last minute tickets they ran out of tickets they had to go print more to
try and get people in the doors and they've never seen anything like that and as you mentioned the stuff sold out
if you if you look at some of the places that are back here we're going to talk about some of these companies but he's been running the numbers and seeing what
happened I I won't say too much now but he's been tabulating all of this up to see exactly what their sales are running
like here versus uh years fast obviously this is sort of a weird microcosm of the
of the business in the company as a whole and it's uh it's it's not Revenue neutral as he always talks about it
looks you're paying you're paying to play here you are in fact it is going to be a big day here first though we should
talk about the news at hand Berkshire is out with first quarter earnings and that came just moments ago Mike has been
digging through all the numbers on this and there's some stories to be told here there are first of all a big swing to the upside in the overall reported
earnings number uh from about five and a half billion uh dollars in the first quarter last year to 35 and a half
billion but almost all that swing was the The Mark to Market on the Investment Portfolio operating earnings though is
still a good story up almost 13 percent you know over a year up to about a little more than eight billion dollars
um it seems like the insurance business specifically Geico swinging to a fatter
underwriting profit from last year we could talk about exactly how they got there but it seems as if higher pricing
less advertising Revenue they they went from margin as opposed to Pure market share it seems at this point also on the
investment side uh seems like there was a reduction in the Chevron stake uh over
the course of the quarter you have to back into the numbers based on the dollar value of the stakes that they give you in the share prices at the time
but essentially it seems like he Berkshire was a seller of about 20 percent of that stake it's like oh
that's a big six or seven billion uh dollars worth okay and um so it's still a significant holding I think it's also
keep worth keeping in mind Chevron stock was up over 50 last year so simply by the market appreciating the the dollar
value went up fair bit well it's interesting though if they were selling some of that steak while they were building the oxy stake uh the accident
petroleum stake um not a call necessarily on oil overall just maybe
picking exactly relative value or or positioning uh within that uh also
Berkshire a net seller of overall Equity uh Securities in the quarter but the
majority of that net reduction seems to be the Chevron I think the rest of it's hard to know Apple pretty much unchanged
of course the Stock's up a lot but the position is unchanged uh Bank of America also seems unchanged too so those are
two not a lot of change in terms of the key core holder for Bank of America that's the one bank that he has kept he
sold out of a handful of other Banks Banks some of them that he's held for a very long time so Bank of America still
seems to be his favorite maybe we'll hear more about that today you would hope yeah and just maybe General thoughts about his assessment of the
banking system and uh whether it needs help whether it looks like it's an opportunity you have all these
valuations that have been crushed another thing to keep in mind uh for the quarter there was four and a half
billion dollars of shares bought back by Berkshire so that was up from 3.2
billion in the year ago quarter it's not an enormous number in terms of the market value of Berkshire which is 700
billion dollars it's like a you know what 18 billion dollar annual run rate of share buyback but they do reduce the
outstanding shares a net reduction of shares down about 1.2 percent uh year over year so you know there were
questions that came in that I've been looking through shareholder questions for a couple of months now that have been coming in on this and some of the
questions that came in is do you still like Berkshire Hathaway do you still like the stock at this price are you
going to be as aggressive as a buyer and I guess this answers some of that question some of it yeah it's it's absolutely not super aggressive uh but
it's it's it's sort of soaking up some of the shares that are out there and of course Buffett in his shareholder letter
was very vociferous about defending the practice of of being able to buy back
stocks so he does it in a disciplined way he wants to be careful about why he's doing it and what the valuation is
but uh clearly he's willing to use that tool oh the cash went up to 130 billion
dollars so up marginally total cash holder nice Pocket Change um there had been a lot of questions
that came into about the insurance company specifically Geico and you can see the gecko right now that's right shoulder uh people just wondering um
what's happening and I guess we'll dig into that a little deeper today and we've got some numbers that you've been going through be very interested to hear
the color on the strategy behind i...

[TEXT_END]

Now, answer immediately and only in json format.<|end_of_turn|>GPT4 Correct Assistant:
<s>GPT4 Correct User: Extract names and organizations from the provided text, and return them in JSON format. Use the following schema:

{
    "type": "object",
    "properties": {
        "names": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "organisations": {
            "type": "array",
            "items": {
                "type": "string"
            }
        }
    },
    "required": [
        "names",
        "organisations"
    ]
}

Here's an example of a response in JSON format:

{
    "names": [
        "sample_string_1",
        "sample_string_2"
    ],
    "organisations": [
        "sample_string_1",
        "sample_string_2"
    ]
}

Do not include anything that is not explicitly mentioned in the text. Analyse the text carefully to ensure all requested data is extracted. Include each name and organization only once. Adhere strictly to the response format without adding extra spaces or text.

[TEXT_START]

...t because there was a little bit of uh you know an issue last year where it seemed
like profitability was done now pricing is up across the industry yeah so policies are able to be written at
higher prices and so that's happening across the board but it does seem you know as I say in the commentary within the the 10q that they did reduce
advertising expense and it was a big swing to the upside and underwriting earnings I mean with insurance it takes
a while to raise pricing for anybody in the industry because you have to go state by state and get Regulators approvals before approvals before you
can actually raise any of it so there is a delay we saw some a huge hit to the profitability of all the insurers as you
know prices to replace cars prices to fix things construction all of that went up right and they couldn't raise their prices as quickly exactly so it is an
industry-wide phenomena but it seems as if uh Geico is trying to decide they want to skew toward more profitable
customers we'll see if that's a theme that's going to continue another couple of tidbits are building products and
consumers margin squeeze that's happening across the industry railroads pretty flat okay the BNSF pretty flat
year over year and a pretty big reduction in consumer related Freight uh loading who needs an analyst you've
already done all the work wow did the highlights I got my little you know tape bookmarks yeah that's good it works okay
we have a lot more to get to this morning we want to give you a quick look at today's schedule though Mike and I are going to be here with you until 10
15 a.m eastern time that is when Buffett Munger and the vice chairman Greg Abel and Ajit Jane are going to be taking the
stage you get to watch all of this the annual meeting you can see it exclusively here on CNBC and cnbc.com
Buffett's going to begin the meeting with the summary of the past quarters results but like we said Mike's already
done that for you so that's your bathroom break time then he's going to open the stage to shareholder questions
and I'll be asking some that have been emailed into me again we've gotten lots and lots of emails this year more than
I've ever seen Buffett's also going to rotate through the 11 microphone microphone positions that are in the
audience too so you'll see a lot of questions being asked around 1pm Buffett will break for lunch but you get to
stick around with us Mike and I will be joined right here by Berkshire board members Ron Olson and Howard Buffett we'll also be talking to Tech investor
Ann widblad and Activision Blizzard CEO Bobby kodek who has been coming to this meeting for years he was here last year
but had to leave early because he was going to a birthday party so he left I think at the lunch break after that is
when Buffett revealed to the crowd that he had taken a stake in Activision Blizzard so Bobby found out when he was
on a plane on his way back home so he's here this year too and we'll find out if there's any new news on that at the time
of course it was viewed as an Arbitrage position because Activision agreed to be sold to Microsoft for cash and Buffett
said it was an Arbitrage position yeah exactly the spread was very wide it seemed just like the market was leaving money on the table now that it looks
like perhaps Regulators in the UK might block that deal the question is does it remain you know on a fundamental basis
yeah that would be fascinating to hear the commentary on that and then at 2 p.m Eastern Buffett and Munger will be back
on stage for another two and a half hours of shareholder q a after the afternoon session wraps we'll be back
with you to recap all of the day's action while the Market's staging a big rally
to close the week after a strong job support and revisions to the February and March employment numbers Apple a
major Berkshire holding which was unchanged in the quarter a key driver for yesterday's rally the stock closing
Higher by about four and a half percent uh on the day after posting better than expected earnings Apple now up nearly 34
percent so far this year here to help us navigate the current market environment and look at opportunities right now as
John Rogers he's the chairman and co-ceo of aerial Investments and a long time Berkshire meeting attendee John good to
see you thanks for stopping by here great to be here um what's your uh I guess what you what are you looking to hear from Mr Buffett
and Munger uh both about I mean we know that the principles are going to accentuate that they always do about how
they do their business and and what matters and doesn't matter in the uh in the earnings and how they approach things but what about the moment right
now where it seems like there's big big questions about financial system stability whether there's value in in
Bank stocks whether the economy can handle these rate hikes a couple things that I'm going to be looking for one you
know when Becky interviewed Warren when he was in Asia and there was talk and Warren talked about how certain of his
companies were not meeting expectations earnings were going to be less than expected and profits were lower than expected and so I want to see if that
trend has continued are is he still seeing weakness in the overall economy and the second thing you know Becky
followed up the question about Paramount Global and he was kind of a little bit soft and not as aggressively supporting
his position as I had expected so I'm curious to see what he has to say today about it especially after the stock just
located this week and you are an owner of Paramount correct we are an owner of Paramount Global assist it's not been a
fun week yeah how do you feel about things I mean Paramount slashing the dividend um I I think it caught some people by
surprise but Mario gabelli was here yesterday and he said it didn't catch him by surprise he wanted them to do that because he wanted them to shore up
their cast position and put it back into their business at this point but I think it was the right decision and I watched
Mario yesterday morning and then we did a panel and he has been very positive about the future
for Paramount Global so we're still very very optimistic we think they have so many assets around the world they've got
that great Paramount Library they've got Mission Impossible coming out this year all the great Sports entertainment that
they have and all-time favorites like 60 minutes so I think there's more there than just the streaming and I think
sometimes people are more worried than they need to be is it more there as a standalone business or do you think that this is an acquisition an acquisition
Target and that I mean Mario belly has always been about you know business of Love Making with mergers and
acquisitions you're right I think that everyone as we talk to experts in in the industry everyone says there's way too many
streaming services we've got to get it down to three four at the maximum so I think it's very possible that Paramount
Global will be bought at some point as a value manager do you look at the Carnage
in things like Regional Banks and anything that seems like it's it's connected to a commercial real estate and view that as uh more of a core risk
to the economic Outlook or is there actual value being being surfaced in the process well I think the stocks have
gotten crushed there's a lot of pessimism and Warren often says you want to be greedy when others are fearful so I think if you're going to buy the
banking stocks you'd buy a whole basket of them not try to pick one but really be Diversified we've been also adding to
our favorite Northern Trust is our favorite Bank stock so I I think we're going to be okay uh it's going to be
some pain and anguish here and of course commercial real estate is getting harmed and it doesn't help having the banks and
the problems they are but yeah down the road we'll be fine I've been talking to folks who here in the last day and there is a little bit of s...

[TEXT_END]

Now, answer immediately and only in json format.<|end_of_turn|>GPT4 Correct Assistant:
<s>GPT4 Correct User: Extract names and organizations from the provided text, and return them in JSON format. Use the following schema:

{
    "type": "object",
    "properties": {
        "names": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "organisations": {
            "type": "array",
            "items": {
                "type": "string"
            }
        }
    },
    "required": [
        "names",
        "organisations"
    ]
}

Here's an example of a response in JSON format:

{
    "names": [
        "sample_string_1",
        "sample_string_2"
    ],
    "organisations": [
        "sample_string_1",
        "sample_string_2"
    ]
}

Do not include anything that is not explicitly mentioned in the text. Analyse the text carefully to ensure all requested data is extracted. Include each name and organization only once. Adhere strictly to the response format without adding extra spaces or text.

[TEXT_START]

...uspense as to
whether in fact clearly we'll get some questions on this whether uh Buffett himself sees there to be any
opportunities here to provide capital in areas where it's like I mean it's worth going back to how he acquired a lot of
his financial positions right American Express 60 years ago coming off a crisis that he needed Capital Bank of America
clearly Solomon Brothers in the early 90s uh Goldman Sachs back around the crisis too so in other words he's been
there as a source of stable Capital at times when the financial system seemed like it was in trouble would you want to
see him make a move like that that would bring a lot of confidence to the economy and to the financial system whenever he
steps up all of us believe in him so much so I think it would be great if he was able to be helpful during this
period and get a great return for Berkshire shareholders
involved with conversations on these things are there deals that he saw and passed up on you know maybe we'll get a
little color into some of that today I'm hoping that you know maybe that's why there's so many people that are here
today too John we're going to welcome our television audience in in just a second but we want you to stay with us
and I'm doing this because literally the TV clicked so I'm just hold your thought for just a second we want to make sure
we are welcoming our audience in fact you're going to hear a bell in just a second yeah ding ding ding there it goes that
sound that you're hearing right now me doing ding ding ding it means that it's 10 a.m eastern time and that means that
it is time to welcome our television audience as well we are again at the Berkshire Hathaway annual meeting and uh
yeah okay want to welcome our television audience here and around the world I'm Becky quick I'm here with Mike santoli
and this is our live all-day coverage of the 2023 Berkshire Hathaway annual meeting we are just 15 minutes away from
the start of the action I want to get back to our conversation with John Rogers he's the chairman the co-ceo and
the co and the CIO of aerial Investments and for everybody on television this is a conversation we've been having with
our streaming audience at cnbc.com so let's just pick it up with where we left off John you were just talking about how if you saw Buffett step in and do some
sort of a deal whether it be in the banking system whether it be something that kind of showed some confidence in
the system that would mean a big deal for investors everywhere I think it would be he is so revered as we know
around the world and I know that the by the Administration has been talking with him and I know you know other leaders
are and you can bet that he's the First Call of Many of the major banking Giants on Wall Street to make sure they're
getting his best advice and including him in these important conversations it's probably also worth keeping in mind that what he says about his take on
whether in fact the banking system is sound whether whether the market is over
overshooting and it's uh attack on some of these Regional Bank valuations it's worth listening to as well if he thinks
the FED has it right we have this deposit backstop facility and you know let's keep in mind one of the reasons
that the bank stocks are going down is consumers are in great shape they got a lot of cheap mortgages unprofitable for
the banks they're looking at five percent money market yields they can move their cash there so it's it's it'll
be interesting to hear whether he thinks it's an economic risk or it's just kind of a sector that's upside down for the
moment well as you know he's such a long-term investor and I think he's going to uh you believe as he always
talks about on at the annual meeting about how last century all the problems that confronted our country we always
resolve him our capitalist democracy is the best system ever invented so this crisis will pass also I also noticed
when I love watching CNBC in the morning when you have those special boxes showing returns like now even showing them Regional Banks how they're doing
that's a sign that you're getting toward a period where you know six months from now that'll be a old forgotten story and
we'll be on to something new but I love seeing that because it gives you a sense of what's important today John you our
long-term investor too you're a value investor you don't often get swayed by things that are happening in the
immediate but I have to think that some of the things you've seen recently have been enough to make you sit up and take
notice is there anything you've changed in the portfolio as a result of the potential credit crunch as the result of
just watching money move quickly out of places does it change your mind or change your investors thesis at all for
the short term the only thing that sometimes happens like right now the housing the markets have been very weak higher interest rates have been
Troublesome and problematic for the Housing Industry so we've been leaning in in some of the suppliers to the
Housing Industry some of our favorites like Leslie's pools for example and residio and people that are creating
products for the Housing Industry we think it's been overdone and there's real Bargains there you know there's been a lot of commentary and to some
degree a bit of angst about how uh narrow the index performance has been at
least recently for times this year we talked about Apple's performance a huge sway on the index is a handful of stocks
really carrying the S P 500 where does that leave you as somebody who looks you know for smaller and and less expensive
less popular stocks at the moment you know you know I've been fishing in the small and mid-cap Value World for 40
years and I really think there's more opportunity than ever there's these orphan stocks neglected stocks and no
one's looking at because they've been so focused on these giant great giant companies that have done so well
what about Berkshire itself in terms of How It's positioned I mean on the one hand it's kind of Eternal right the way
it's structured and who it's managed by but on the other I mean if you look at what's in favor right now you know aside
from Apple which is you know 20 of the market value of Berkshire right now uh it's taken it is uh you have you know
Consumer Staples exposure you have real Capital assets that people seem to be wanting to have pricing power
um and even on the insurance side they're able to actually make a profit interest rates being up perhaps a help to them do you see value in the shares
themselves at this level we do you know when Warren makes the decision to buy back stock the way he has he's so
conservative and so for him to make that bet it gives you a really great sense that he sees the company as really
undervalued and we would agree we think it's a great bargain here today do you have you been buying I own it
personally and I was too big for my small and mid cap value uh portfolios at aerial fund but uh I haven't added to it
in a while but that's a good suggestion John thank you very much we always enjoy
seeing you here and we really appreciate your time this morning John Rogers from aerial Investments
all right Berkshire Hathaways 11.6 billion dollar acquisition a year ago of the property and casually ensure
Allegheny brought another diverse portfolio of Brands Under The Berkshire umbrella including one of the fastest
growing toy makers in the U.S that's Jazz Wares they make all those viral squishmallows and here at the meeting
they're actually selling special Warren Buffett and Charlie Munger squish millos this weekend they come in eight inch
size and 16 inch versions this is the first time they've ever made squishmallows with a real human space on
them we actually caught up with the company CEO and chief commercial officer Judd and Laura zaberski
we acquired a company called kelly toys okay and they were making squish Mallows it's been around since 2017 and we were
fortunate...

[TEXT_END]

Now, answer immediately and only in json format.<|end_of_turn|>GPT4 Correct Assistant:
<s>GPT4 Correct User: Extract names and organizations from the provided text, and return them in JSON format. Use the following schema:

{
    "type": "object",
    "properties": {
        "names": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "organisations": {
            "type": "array",
            "items": {
                "type": "string"
            }
        }
    },
    "required": [
        "names",
        "organisations"
    ]
}

Here's an example of a response in JSON format:

{
    "names": [
        "sample_string_1",
        "sample_string_2"
    ],
    "organisations": [
        "sample_string_1",
        "sample_string_2"
    ]
}

Do not include anything that is not explicitly mentioned in the text. Analyse the text carefully to ensure all requested data is extracted. Include each name and organization only once. Adhere strictly to the response format without adding extra spaces or text.

[TEXT_START]

... enough to build the brand once we acquired Kelly toys because the I mean people compare this to things like
beanie babies or to the Cabbage Patch Kids where each of these dolls has a story that comes with it
um and you guys are holding the special ones you have for this meeting of Warren and Charlie the first time you've ever
put a human face on one of this yeah that's the first time so what they worked out pretty good okay but the story lines was that part of it too or
did you guys add to that yeah so every squishmallow has their own individual story and when we acquired Kelly toy we
saw that they had this almost diamond in the rough they were people were passionate about it it wasn't available
everywhere it wasn't available globally and then um the BIOS were always part of it and
what made them really special because kids and adults all identified with something in the bio and so Charlie and
Warren both got their own bio that represented them as people not just business people but the really
interesting individuals they are one of the things that I think is so amazing I mean these are cool plush toys but
they're plush toys they're squishy they have become the most popular toy brand in 21 states they beat out Nerf Plato
they beat out Nintendo switch and Hot Wheels how does that happen with the plush and have you guys been surprised
by how they've taken off of course yes it struck a nerve during covet um it was
this palliative type of feel so people that were sitting at home they bought more and more squish mouths and they
fell in love with the product we're the number one best-selling toy in the US and in many many markets and I think
what's interesting about it is our demographic it's not just kids it's adults it's grown women it we did a
squish Squad tour and we went to major cities and we looked at the line and we said wow there's not that many children
in the line and that's the interesting part people find it emotionally supportive [Music]
um and that was a big surprise to us yeah people in their 50s are buying our squish Mellows wow that that's very
unusual in the greatest and when you found out you were being acquired by Berkshire Hathaway your thought was
wow wow and and what's happening uh dream come true yeah if you would have
asked us 26 years ago if this would ever ever happen we would say no way but
anything's possible in America well the interesting part is Berkshire didn't keep all the companies I think there
were some they kind of got rid of they saw you guys and they chose to keep you which we were you know we I remember
last year we were watching we knew the transaction was happening we watched the
annual shareholders meeting and we said are we going to be there next year
well in fact um I think they probably are going to be here next year I think the question is how much of the floor space they're
going to get to negotiate because those have been hot cells yeah they do um we weren't the only ones who caught
up with them though we were actually there a little later when Warren Buffett and Charlie Munger came by to visit the Jazz Wares Booth right here in the
exhibition Hall and here's a funny coincidence for you both of them both Laura and Judd were lawyers before they
had this career that's something they had in common with Charlie Munger too and by the way
um they were sick of it too they wanted a job that was a little more fun
did you know they're both reformed lawyers yes they both started out as lawyers well I'm ready to see a reform
lawyer every time
going through purchase orders yes Charlie I would like to see about 90 of
the lawyers being reported this is a pinch me moment for us because we remember you know Judd started the
company by himself no financing no money no backing and just did it and worked
hard and of course we had many bumps in the road many many and it makes it that
much sweeter and we appreciate it and we never forget how hard it was and we're much smarter today than we were
yesterday I learned a lot I'm glad you were part of alligators
you can catch more of our interview with Judd and Laura zaberski of jazz Wares and some other sounds from inside the
Berkshire Hathaway annual meeting in a special series of cnbc's daily podcast it's called squawk pod and you can just
scan the QR code on your screen to go ahead and follow squawkpod you can check it out and listen anytime
all right well joining us now is the CEO of Benjamin Moore Dan Calkins Benjamin Moore has been part of the Berkshire
portfolio for more than 20 years Dan uh good to have you great to be back I hope
you're enjoying the weekend so far um eventful few years since you've been running the business yes uh for
Berkshire for at Benjamin Moore uh housing boom in conjunction with the pandemic then a little a downturn in in
home related investment uh and Improvement where do things stand right now we got the inflationary pressures
you've had to kind of deal with and and pass through so what's your kind of snapshot of the business right now well
we're in a great spot we're actually celebrating our 140th anniversary this year at Benjamin Moore so we've been
through Cycles like this before obviously we'll be through this cycle but it has been uh what we call Benjamin
Moore Chasing The New Normal because we had explosive growth in 20 and 21 took a
little bit of a step back in the fourth quarter last year got off to a slow start this year but at the end of March
we start to see light we're going into the paint season and people people are seeing starting to paint again and we're
seeing that in our numbers on a regular basis obviously Big Driver for us is housing turnover and with interest rates
High people not moving as much that has an impact on our business so as as mortgage rates hopefully start to come
down we'll see more active moving and that drives to more sales for us and and just fixing things up when you cut a
paint we can't or we can't move so yeah exactly yeah you'll have people doing some remodeling if they're not going anywhere but the Big Driver is the churn
somebody moves they pay every room every room and in terms of supply chain I mean
Paint and Coatings were particularly dramatically impacted yes you know during the the pandemic with a lot of
the components there short supply where does it sit right now and if you retain pricing I was just looking in aggregate
in the in the quarterly report that you know there has been some margins Queens across the building products uh
businesses in total which is not suspicious right no we we definitely felt that we're in a good spot now
um early last year we had about 65 days of inventory on end our historical average is about 95 days on hand I'm
happy to say at the end of March we're at 96 days on hand so we're back to our historical Norms from an inventory
perspective we have been able to take pricing over both 21 and 22. so uh while
we've felt some of the pressure and compression last year that's evening out now we're recognizing the pricing we
took last year in our sales this year so things are good and so that implies that you haven't seen consumers sort of trading down or
anything like that when it comes to pain yeah no we have we have some particularly on the DIY side we've seen
some of our product mix shift from some of our more premium products to more of our mid-range products uh but we have a
full assortment full of portfolio that can meet all those price points that are out there and so we're we're seeing some
of that happening right now and with margin shrinks when somebody trades down how much does it's maybe six seven
points on from a you know premium product to more of a commercial type product but we have healthy margins at
Benjamin Moore we have one of the things Warren life's best about Benjamin Moore is that premium position we carry you
know there there's a big picture uh sort of long-term structural bullish story about home ownership actually
homeownership rates uh ticking highe...

[TEXT_END]

Now, answer immediately and only in json format.<|end_of_turn|>GPT4 Correct Assistant:
<s>GPT4 Correct User: Extract names and organizations from the provided text, and return them in JSON format. Use the following schema:

{
    "type": "object",
    "properties": {
        "names": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "organisations": {
            "type": "array",
            "items": {
                "type": "string"
            }
        }
    },
    "required": [
        "names",
        "organisations"
    ]
}

Here's an example of a response in JSON format:

{
    "names": [
        "sample_string_1",
        "sample_string_2"
    ],
    "organisations": [
        "sample_string_1",
        "sample_string_2"
    ]
}

Do not include anything that is not explicitly mentioned in the text. Analyse the text carefully to ensure all requested data is extracted. Include each name and organization only once. Adhere strictly to the response format without adding extra spaces or text.

[TEXT_START]

...r still even through this tough period of the economy and this demographic wave
that's washing over but how are you thinking about you know expansion plans your ability to capture a lot of that
yeah so we have been growing and expanding banding not just here in North America we've been very aggressive
particularly in the UK and then moving into Western Europe which is still a small part of our business but we
haven't felt the impact over there because it's New Territory for us so we have explosive growth going on over
there right now um very exciting and then here long term you know you to your point about families being formed that's
happening with this generation that's coming into the housing market and we just think this is a blip through this period of time and as that changes we're
prepared to to service what we need to guys if you take a look you'll see this is live footage Warren Buffett is making
his way out to the stage right now Charlie Munger will be joining him as well that means they're getting about to
the end of the movie they have a long movie 45 minute movie that they show to the shareholders that's what they're
seeing right now and there you see Charlie Munger as well so they're being taken out the stage that means things are about to kick off there so I'm going
to take my lead that also means you need you have a place to go okay Mike thank you I'll see you back in just a little
bit thanks so much thank you all right
well Insurance of course one of the big story lines in berkshire's earnings this morning joining me now to discuss is
evercore's David modematan he has got a buy rating by ratings on the likes of
Travelers Progressive and Chubb uh uh from evercore isi and uh and Dave thanks
for joining us uh this morning we were just talking about how over the first quarter berkshire's Insurance businesses
specifically Geico did make some moves on pricing uh kind of fattened up the margins reducing advertising spend uh it
seems like maybe an industry-wide Trend but what's been happening overall in the competitive uh landscape when it comes
to those Insurance lines yeah thanks for having me um so you are seeing just a massive hard
Market in the personal auto insurance space and Geico has actually taken you
know fairly aggressive action on that front um and you can see that you know the focus on profitability is really
coming through in in these results where their policies enforce shrank 13 and you
know that's partly because you know there are premium per policy which is a good proxy for their price increases you
know is up 15 uh which is you know among the highest that I've seen you know
Allstate was up 16 Progressive was up you know close to that level so the entire industry is Raising price
um for guy for Geico it looks like they've actually seen you know the their
units come down quite a bit and that's resulted in them having better frequency Dynamics which is you know really help
their earnings and you know another thing I've noticed is um you know they really cut their ad spend to really
focusing on you know what what people they're underwriting to focus on profitability
I appreciate that color uh David that was uh that's great we're going to hear a lot more about that in the Q a in the
shareholder meeting and we're going to actually get to Becky and Warren Buffett and Charlie Munger for Q a we'll see you
back here at 1pm Eastern time when Buffett and Munger break for lunch ahead of their afternoon q a session and the
official shareholder meeting they are taking the stage uh right now as you can see and we go there live morning good
morning and thanks for coming I love it Charlie loves it we're glad to
have you here we're going to make this this uh
preliminary before the question is very short because we want to get in at least
60 Questions half divided by the the audience outside this
Arena and half from you so I would just
like to get right to the to the directors and the earnings that
have been put up on the uh on our webpage this morning but we'll
cover those very fast and we'll get to the questions now I
when I woke up this morning I realized that
we had a competitive broadcast going out
uh somewhere in the UK and and
they were they were celebrating a King Charles and we've got our own
King Charles here today [Applause]
and next to him we have Greg Abel we was in charge of all the
operations except for insurance next
[Applause] [Music] [Applause]
and next to Greg we have uh a man I ran into a 1986 and
has made us look good ever since we have the man in charge of insurance Ajit Jane Jaden
and now we have our our directors here in front and if they would just stand briefly and then I'll go on
to the next one and and uh they're all here today first of all
doing alphabetically there's Howard Buffett
there's Suzy Buffett
there's Steve Burke
Ken Chenal
Chris Davis
Sue Decker Charlotte Diamond
Tom Murphy Jr Ron Olsen
Wally Weiss and Merle Whitburn
that's as good as you can get and there's one other person I would like to mention before we
get onto the earnings that were put on the uh in the press release this morning and
uh uh that's uh uh
well let's see who we have here we've got this is hard to believe
can you imagine a name Melissa Shapiro Shapiro
and she was uh Melissa Shapiro Joseph married another Shapiro and she put this
whole thing together with no help from me no help from Charlie and a lot of
help from the people of the other room Melissa
yeah it's very easy if you can remember her second name you can remember her third name so Melissa shabiro Shapiro
and with that I would like to next move on to the
earnings and a couple small slides that that explain what
we're all about and then we're going to get to the Q a and uh the slide is up behind me yeah there it
is we reported in the first quarter
operating earnings a little over 8 billion and when we talk about operating
earnings we're basically referring to the earnings
of Berkshire Hathaway as generally well as required under gaap excluding however
capital gains both realized and unrealized there's a few other very minor items but basically
we expect to make capital gains over time why would we own the stocks otherwise uh doesn't always work out but
overall it works out pretty well over time but in any day any quarter any year
even occasionally over a five-year period uh the stock prices move around
capriciously now we own a lot of other businesses we consider those stocks businesses we own a lot of other
businesses where they get Consolidated and they don't move around in value now if we had a little bit of Burlington
stock outstanding if we had a little bit of the energy stock trading those stocks
would move around a lot but the businesses are what count so the
operating earnings as you'll see in the first quarter came in at about 8 billion
and I would say that in the general economy
the feedback we get is that uh I would say perhaps the majority of our
businesses will actually report lower earnings this year
than last year the in various degrees in the last six months or so at various times
uh the the businesses have left the incredible
period which is about as extraordinaries I've seen in business since World War II
uh with the government would pour out a lot of money to people who couldn't get
Goods it was more extreme in World War II but this was extreme this time and it was just a question
of getting Goods the deliverer and people bought and they didn't wait for sales and if
you couldn't sell them one thing they would put another thing in their backlog it was an extraordinary period and that
period uh has ended it hasn't ended with as you
know it isn't that employment's fault or off a cliff or anything in the least but
but uh it is a different climate than it was six months ago and and a
number of our managers uh were surprised some of them had too much inventory on order and then all of
a sudden it got delivered and people weren't in the same frame of mind as earli...

[TEXT_END]

Now, answer immediately and only in json format.<|end_of_turn|>GPT4 Correct Assistant:
<s>GPT4 Correct User: Extract names and organizations from the provided text, and return them in JSON format. Use the following schema:

{
    "type": "object",
    "properties": {
        "names": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "organisations": {
            "type": "array",
            "items": {
                "type": "string"
            }
        }
    },
    "required": [
        "names",
        "organisations"
    ]
}

Here's an example of a response in JSON format:

{
    "names": [
        "sample_string_1",
        "sample_string_2"
    ],
    "organisations": [
        "sample_string_1",
        "sample_string_2"
    ]
}

Do not include anything that is not explicitly mentioned in the text. Analyse the text carefully to ensure all requested data is extracted. Include each name and organization only once. Adhere strictly to the response format without adding extra spaces or text.

[TEXT_START]

...er
and now we'll start having sales at places where we didn't need to have sales before but
uh despite the fact that this year I think in general will
be slower than last year we actually are
situated so that I would expect and believe me when I say accept expect
it's nothing is sure nothing sure tomorrow nothing sure next year and nothing is
ever sure either in markets or in business forecasts or anything else and we don't
pay much attention to markets or forecasts unless the markets happen to offer something interesting to do but
nevertheless we are positioned in two respects as you'll see from from this uh first report our investment
income is going to be a lot larger uh this year than last year and that's
that's built in I mean we have as you'll see in a minute we've had 125
billion or so and very short term investments and
and believe it or not uh not that long ago
we were getting four basis points which is next to nothing
on that 100 125 billion which means we were getting 50 million a year
and now the same money the other just day or day before yesterday we
actually bought because of some of the funny twist in the market because of doubts about the
deficit ceiling of the debt ceiling uh we bought three billion of bills of the
590 that's 5.9 there's a 5.92 bond
equivalent yield so we will have what produced us not that
long ago on a 12-month basis was producing 50 million a year producing uh something in the area of 5 billion
here so we're in a position where the investment income is essentially well it
is certain to increase quite a bit and insurance underwriting
is not it uh it does not correlate
with business activity it depends on things like hurricanes and earthquakes
and and other events so on a prospective basis on a probability
basis we're likely to have a a better year
uh this year in Insurance underwriting than we had last year uh it just isn't
affected by what you might call the business cycle or uh what applies to generally an
industry detailing you name it uh so I would expect
and one massive earthquake or one one hurricane that came into just
the wrong place uh would can affect that prediction but on a
probabilistic basis our insurance looks better this year so if you get two of those
two of the elements there of our main office of earnings that look like they
will swing in our Direction I would expect but I can't promise that our operating and earnings will be
greater than last year and if we'll move to
the second slide uh I give you those operating earnings
figures just to give you a overview of what has happened
uh since the pandemic started and off of the year
the yearbook before as a base and we retain all our earnings as you know
so if we're retaining 30 or 35 billion or whatever but maybe a year you should
expect more operating earnings over time I mean this this number should be
significantly higher five or ten or fifteen years from now because we have the advantage of retaining earnings and
that's what got us to these figures because they were essentially nothing when we started and they got there by
retaining earnings and we'll keep retaining earnings so it's no great Triumph if these numbers move up and
what we hope is that they move up at a reasonable rate historically
they moved up at an unreasonable rate sometimes but we're working with a much smaller sums then and that can't be
repeated with our present Capital base because I note there I believe it's on this slide let's take
a look uh now that'll be
see it's on the while on the on the next on the next
place paid and let's move to the next slide we showed that we had on March 31st
now 500 and was it five hundred and four billion
of Gap net worth now what might surprise you
is that there's no other company in the United States no other company that has a number
that is that large now that isn't because we've got the most valuable
company in the United States other companies have used their money to repurchase
shares they could have accumulated 504 billion in gap but basically we have more
under Gap accounting now than any other company in the U.S and of course if you
measure return on on Equity that becomes a very big number to increase
at a rapid rate but we hope to do so uh not a rapid rate additional rate
um and right below that you see something called float
and float is money that uh is left in our hands
uh somewhat Akin but very importantly different
than a bank deposit but it uh you have to pay interest to get a bank deposit and you have to pay more
interest these days and you have to run a bank and do a lot of things and basically this is money that represents
unpaid losses at this time you get paid in advance in insurance so what shows up
as a net liability on our balance sheet is gives us funds uh
do exercise with an amount of discretion that no other insurance company that I know of in the
world enjoys just because we have so much net worth and our float now
comes to 165 billion and the man sitting on the far left is
responsible for moving that number up from a pittance in 1986
to this incredible figure which in most years practically all years
hasn't cost us anything so it's like having a bank with no employees no
interest and no ability to withdraw the money in
a hurry that we have working for us and it's a
very valuable asset that uh that shows up as a liability and uh
Ajit is responsible for building up this
treasure uh which has done been done by out competing insurance companies all over
the world and then now a number of our insurance companies in turn are run by talented managers
who contributed one way or another uh start with Geico
the beginning of my career and the uh that float if you think about it just
think of a balance sheet you've got liabilities here and you got assets over here and and the liability side finances
the asset side is very simple and stockholders Equity finances it
long-term debt finances and so on but stockholders Equity is very expensive in
a real sense long-term debt has been cheap for a while but it can get expensive and it
can also become new eventually and it and it may not be available
uh but float is another item which shows it's a
liability but hasn't cost us anything and it it can't disappear in a hurry and
it finances the asset side in the same way a stockholders equity
and nobody else thinks of it much that way but but we've always thought of it that way and it's built up over time so
uh I show at the bottom what's happened with cash and treasury bills
through March 31st and I will tell you that the in the month of April
we probably added about seven billion dollars to that
factor now part of that is because we didn't buy as much stock because that reduces reduces cash and treasury bills
we bought about 400 million dollars worth of stock in the month of April that's that's a minus uh in terms of
cash available and uh uh we we however sold not some stock
which produced maybe four billion and of course we had operating earnings probably two and a half
billion or something in that area and my guess is we've probably increased our cash and treasury bills uh six and seven
billion in the month and uh I just want to give you a feel for how the cash
flows at Berkshire and then if we move to the final I think it's the final one next to the last one uh
no I I think it is a lot let's see it's the fourth yeah is this we should have the one up
there class a equivalent shares outstanding and and uh you'll notice that every year
the number of our shares go down so if we own more businesses and the businesses make more money
your chair as shareholders as owners a Berkshire increases every year without
you laying out any money now you're laying out the uh the
alternative which you could receive in dividends but the reason we've gotten to
where we are is because we we kept the money we did pay a dividend
in 19. 67 10 cents to share it was a terrible mistake and uh I
I always tell people that I I'd love for the men's room and the...

[TEXT_END]

Now, answer immediately and only in json format.<|end_of_turn|>GPT4 Correct Assistant:
Tokens generated: 26.25
Total Time Taken: 2.93 seconds
Tokens per Second: 8.95
 {
    "names": [
        "Ajit"
    ],
    "organisations": [
        "Gap",
        "Berkshire"
    ]
}
<s>GPT4 Correct User: Extract names and organizations from the provided text, and return them in JSON format. Use the following schema:

{
    "type": "object",
    "properties": {
        "names": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "organisations": {
            "type": "array",
            "items": {
                "type": "string"
            }
        }
    },
    "required": [
        "names",
        "organisations"
    ]
}

Here's an example of a response in JSON format:

{
    "names": [
        "sample_string_1",
        "sample_string_2"
    ],
    "organisations": [
        "sample_string_1",
        "sample_string_2"
    ]
}

Do not include anything that is not explicitly mentioned in the text. Analyse the text carefully to ensure all requested data is extracted. Include each name and organization only once. Adhere strictly to the response format without adding extra spaces or text.

[TEXT_START]

... directors voted while he's gone but that isn't true I
was there I confessed they uh but we're very invested and has produced the
500 billion plus of shareholders equity in the 30 billion plus of operating
earnings and and we'll continue to follow that policy because it makes a great deal of sense and uh
with that I think we've taken care of the preliminaries you can study that
the 10q is is on the uh on the uh web page and if you have a
week or two vacation you could spend it reading the 10q and but that is the
essence of Berkshire and with that I will start with Becky quick and and uh we
will alternate between Becky and and the audience and her questions have come in
from all over the country and I believe you identified the sender and go to it Becky thanks Warren the first question
comes in from Randy Jeffs in Irvine California and his question is if Silicon Valley Bank's deposit had not
been fully covered what do you think the economic consequences would have been to the nation
simply say it would have been catastrophic and that's why they were covered uh and
even though the FDIC limiters two hundred and fifty thousand dollars that that's the way the statute reads
but that is not the way the us is going to behave uh any more than they're going to let
the debt ceiling uh uh cause the world to go into turmoil and uh they
uh well they're just I can't imagine
anybody in the administration and the Congress and the Federal Reserve whatever it may
have been FDIC I can't imagine anybody's saying uh I'd like to be the one on
television tomorrow and explain the American public why we're keeping uh
uh only two hundred and fifty thousand dollars insured and we're gonna start around every Bank in the country and
disrupt the world financial system uh so uh I think it was inevitable Charlie The
Amity no I have nothing to add
apparently I should mention this now ajeet and Greg will be here in the
morning session which ends at noon and so uh it's got questions to direct to
them the time to do it is in the first half of the show and and then after lunch
that'll just Charlie and I will be back okay
area one hi nirav Patel Haverhill Massachusetts
Mr Buffett Mr Monger it seems like you found the Sweet Spot between being too
conservative and too aggressive as investors do you ever make bad investment
decisions because of your emotions and what do you do to try to keep that from
happening well we make
bad investment decisions uh plenty of times I make more than Charlie
because I'm I like to think it's because I make more decisions but probably more batting average is worse but
I can't recall anytime
in the history of Berkshire the women in an emotional decision that
I know the movie had Jamie Lee in there but that that was for Laughs
I mean Jamie Jamie Lee she's good but she's not
good enough to get me or Charlie to make an emotional decision
laughs Charlie
I'm sure you have something to add on that
well it's a different movie than it is shown in most corporate meetings [Laughter]
personal decision no I don't know that's in business we're talking about
yeah no you don't want to be a no emotion person in all of your life but you you definitely want to be a no more
no emotion person to making an investment or business decision that uh you can argue that that uh uh
with it we probably I would say that we
've made an emotional decision perhaps and when a manager has been with us for
some period and we haven't we've
we we've ignored the fact that perhaps they weren't quite what
they were earlier but our businesses are so good that they they've run better sometimes
uh when uh well I've talked about
West Coast for example the wonderful Louisville it ran on
it ran on automatic pilot for a while but I don't think we suffered to buy it but you can argue
that if Charlie and I hadn't liked Louie as much as we did we might have spotted
a little bit early but I don't think it made any difference in the results would you would you agree with that Charlie
yeah yeah we totally with it and
I'm glad we behaved the way we did it Wesco by the way we bought the thing for
a few tens of millions and it became worth two or three billion
yeah that wasn't common in the Savings and Loan businesses you may have noticed they want they really went crazy in that
industry and and we had a wonderful guy in Louis and we didn't go crazy yeah we didn't go crazy
dumb okay Becky
this question comes from Ben Knoll in Minneapolis he says he's a Berkshire shareholder of three decades and he's
attended many Berkshire meetings he's here again this year and this is addressed to Ajit and Greg he says last
year I asked you about how Geico and BNSF appeared to lose ground to their leading competitors Geico on telematics
and BNSF on Precision scheduled railroading Ajit you responded by saying how you expected Geico to make progress
in a year or two Greg you spoke about your pride in BNSF but you didn't directly address the threat of precision
scheduled railroading will each of you please provide perspective on these competitive challenges and our company's
strategies to address them let me
in terms of Geico and telematics let me make the observation that Geico has
certainly taken the ball by the horns and has made rapid strides in terms of trying to bridge the gap
in terms of telematics and its competitors they have now reached a point where on
all new business close to 90 percent is has a telematics input to to the
pricing decision unfortunately less than half of that is being taken up by the policyholders the
other point I want to make is even though we have made improvements in in terms of Bridging the Gap on telematics
we still haven't started to realize the true benefit and the real culprit or the
bottleneck is technology Geico's technology needs a lot more work than I thought it did
it has more than 500 actually more than 600 Legacy systems that don't really talk to each other and we're trying to
compress them to no more than 15 16 systems that all talk to each other
that's a Monumental Challenge and because of that even though we have made improvements in telematics we still have
a long way to go because of Technology [Music]
because of that and because of the whole issue more broadly in terms of matching rate to risk Geico is still work in
progress I don't know if you any of you had a chance to look at the first quarter results but Geico has had a very
good first quarter coming in at a combined ratio of 93 and change which means a margin of six and change
uh even though that's very good it's not something we can take to the bank because they're two unusual items that
contributed to it firstly we've had what is called prior year Reserve releases
we've reduced results for the previous years and that contributed to it and secondly every year the first quarter
tends to be a seasonally good quote of auto insurance writers
so if you just for those two factors my guess is the end of the year Geico will end up with a combined ratio just south
of 100 as opposed to the Target they're shooting for 96.
I hope they reached the target of 96 by the end of next year
and but instead of getting too excited about it I think it's important to realize that even if we reach 96 it will
come at the expense of having lost policy holder there is a trade-off between
profitability and growth and clearly we have we're going to emphasize
profitability and not growth and that will come at the expense of policyholder so it will not be until two years from
now that we'll be back on track fighting the battles on both the profitability and growth front
Greg yep moving to BNSF
I'll start again by expressing great pride in the BNSF team we have an exceptional group of led by Katie and
and her managers that uh show up every day to do great work on on the railroad
at the same time they would be the first to acknowledge there's more to be done there uh the specific reference to
Precision scheduled railroading the other large railroad Class A uh
r...

[TEXT_END]

Now, answer immediately and only in json format.<|end_of_turn|>GPT4 Correct Assistant:
Tokens generated: 62.00
Total Time Taken: 5.49 seconds
Tokens per Second: 11.29
 {
    "names": [
        "Judd",
        "Laura Zaberski",
        "Warren Buffett",
        "Charlie Munger",
        "Dan Calkins"
    ],
    "organisations": [
        "Jazz Wares",
        "Benjamin Moore",
        "Berkshire Hathaway"
    ]
}
<s>GPT4 Correct User: Extract names and organizations from the provided text, and return them in JSON format. Use the following schema:

{
    "type": "object",
    "properties": {
        "names": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "organisations": {
            "type": "array",
            "items": {
                "type": "string"
            }
        }
    },
    "required": [
        "names",
        "organisations"
    ]
}

Here's an example of a response in JSON format:

{
    "names": [
        "sample_string_1",
        "sample_string_2"
    ],
    "organisations": [
        "sample_string_1",
        "sample_string_2"
    ]
}

Do not include anything that is not explicitly mentioned in the text. Analyse the text carefully to ensure all requested data is extracted. Include each name and organization only once. Adhere strictly to the response format without adding extra spaces or text.

[TEXT_START]

...ailroads in the in the U.S follow that and including the two in Canada we're
well aware of what they're doing and and obviously pay close attention to their
operating Matrix and our team strives every day to be more efficient obviously I would say we balance it with the needs
of our customers if I look back to pre-2022 so we look at the three-year
period of 2019 2020 2021 the BNSF team
made significant progress on their efficiencies and and and delivering
overall value back to the shareholders and and to their customers and at the same time maintaining a very safe
railroad for our employees so we're making excellent progress that didn't stop last year they made great progress
again the reality in 2022 as we did go through a period of time where we had to
call it reset the railroad we came out of the pandemic there were the supply challenges we had certain other issue
labor issues and other things going on at the port and the reality is our team prioritized getting the railroad back in
place for the long term not a short-term focus on hitting certain operating metrics in in
2022 we're well aware of where we were relative to those metrics but the real Focus was to get the railroad reset in a
in a safe manner such that we could deliver long-term value and long-term service to our customers and and that's
really what we'll continue to see with that team they'll be continual progress there'll be years where it's not as
quickly or even we go backwards but over the long term we'll be very uh we'll see
exceptional results from from that team and and couldn't be more proud that we have that asset thank you
I wouldn't I would just [Applause] well he deserves but both of them
deserve Applause the I would like to add one thing [Applause]
[Music] [Applause] the
uh at Geico .com's uh
was ajith's Choice my choice go back to guy at Geico
to work on the problem of matching rate to risk which is what
insurance is all about and uh he arrived
with exquisite timing right before the pandemic broke out all kinds of things changed but Todd is
doing a wonderful job at Geico and uh and he
works closely uh with the Jeep but because he saw his home and all Morgan comes back
here and we we get together on the weekend sometimes too so uh uh that's been a
a remarkable com accomplishment under difficult circumstances and he's not all the way home but he's a he's made a very
very big change uh in multiple ways Geico and then one
other thing I would like to mention there have been a lot of public companies created in the last decade
thereabouts in insurance and uh there's none of them
that we would like to own and and they always started out in their perspective saying this is
a tech company not an insurance company of course we're of course they're in a tech company where everybody's
whether they're in insurance or a lot of other places are using the facility but
you still have to properly match rate to risk and uh uh that they invariably have
reported the huge losses they've eaten up capital but there's been one company that nobody
has generally heard of there's only been one that I know of company started in the last 10 years
that has been a overwhelming success and that's a company that a g
and four people who join with them set to develop a new business
it's called Berkshire Hathaway specialty it now has uh what's the float a g coming up to 12
billion yeah we've we've built more slope than probably all these companies combined
we've now it it's cost us essentially nothing in terms of an underwriting loss
the four people have turned in I don't know 1500 around the world we we took on the whole industry and we brought
some unique how and the four people that came and
now have like I said 1500 or so worldwide and we brought capital and we
brought okay capabilities that really only Berkshire
could Supply so it was the it was the combination of of...

[TEXT_END]

Now, answer immediately and only in json format.<|end_of_turn|>GPT4 Correct Assistant:
Tokens generated: 92.75
Total Time Taken: 5.65 seconds
Tokens per Second: 16.43
 {
    "names": [
        "Buffett",
        "Munger",
        "Greg Abel",
        "Ajit Jane",
        "Ron Olson",
        "Howard Buffett",
        "Ann widblad",
        "Bobby kodek",
        "John Rogers"
    ],
    "organisations": [
        "Berkshire",
        "Activision Blizzard",
        "Apple",
        "Paramount Global",
        "Northern Trust"
    ]
}
Tokens generated: 51.25
Total Time Taken: 2.94 seconds
Tokens per Second: 17.44
 {
    "names": [
        "Randy Jeffs",
        "Nirav Patel",
        "Ben Knoll"
    ],
    "organisations": [
        "Silicon Valley Bank",
        "Berkshire",
        "Geico",
        "BNSF"
    ]
}
Tokens generated: 76.00
Total Time Taken: 5.95 seconds
Tokens per Second: 12.77
 {
    "names": [
        "John Rogers",
        "Warren Buffett",
        "Charlie Munger",
        "Judd Zaberski",
        "Laura Zaberski"
    ],
    "organisations": [
        "Aerial Investments",
        "Berkshire Hathaway",
        "Allegheny",
        "Jazz Wares",
        "Kelly Toys"
    ]
}
Tokens generated: 139.00
Total Time Taken: 6.80 seconds
Tokens per Second: 20.43
 {
    "names": [
        "Becky",
        "Mike Santoli",
        "Warren Buffett",
        "Charlie Munger",
        "Greg Abel",
        "Ajit Jain",
        "Debbie Pasonic",
        "Bill Murray",
        "Jane Frazier",
        "Ruth Porat"
    ],
    "organisations": [
        "Berkshire Hathaway",
        "Apple",
        "Berkshire's shareholders",
        "Alphabet",
        "Berkshire Hathaway",
        "Berkshire Hathaway",
        "Berkshire Hathaway",
        "Berkshire Hathaway",
        "Berkshire Hathaway",
        "Alphabet"
    ]
}
Tokens generated: 49.75
Total Time Taken: 1.57 seconds
Tokens per Second: 31.72
 {
    "names": [
        "BNSF",
        "Geico",
        "Berkshire Hathaway specialty"
    ],
    "organisations": [
        "BNSF",
        "Geico",
        "Berkshire Hathaway specialty"
    ]
}
Tokens generated: 154.50
Total Time Taken: 7.11 seconds
Tokens per Second: 21.73
 {
    "names": [
        "Mike",
        "David Modematan",
        "Warren Buffett",
        "Charlie Munger",
        "Howard Buffett",
        "Suzy Buffett",
        "Steve Burke",
        "Ken Chenault",
        "Chris Davis",
        "Sue Decker",
        "Charlotte Diamond",
        "Tom Murphy Jr",
        "Ron Olsen",
        "Wally Weiss",
        "Merle Whitburn",
        "Melissa Shapiro Shapiro"
    ],
    "organisations": [
        "Insurance",
        "Berkshire Hathaway",
        "Evercore",
        "Travelers",
        "Progressive",
        "Chubb",
        "Geico",
        "Allstate"
    ]
}
Total number of requests: 8
Aggregation complete! The aggregated data has been written to '/content/drive/My Drive/data_extraction/outputs/output.json'.
Error rate is 0.0

Extracting Structured Data from Unstructured Text with LLMs¶

Motivation¶

Example output¶

Why should you read this notebook?¶

Source Code¶

Pre-requisites¶

Get Started¶

Install dependencies¶

If using Google Drive to store input/output files¶

Import libraries¶

If using a private model on Hugging Face¶

Configuration¶

Utility Functions¶

Prompt¶

Aggregate (JSON)¶

`json_schema.json`¶

Aggregate (YAML)¶

`yaml_schema.yaml`¶

Chat completion request¶

Extract¶

Extracting Structured Data from Unstructured Text with LLMs¶

Motivation¶

Example output¶

Why should you read this notebook?¶

Source Code¶

Pre-requisites¶

Get Started¶

Install dependencies¶

If using Google Drive to store input/output files¶

Import libraries¶

If using a private model on Hugging Face¶

Configuration¶

Utility Functions¶

Prompt¶

Aggregate (JSON)¶

json_schema.json¶

Aggregate (YAML)¶

yaml_schema.yaml¶

Chat completion request¶

Extract¶

`json_schema.json`¶

`yaml_schema.yaml`¶