▶fflb0108.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"fflb0108.png","Invoice":"https://storage.googleapis.com/llm-test-files/8f8110a10725df4b07694d4a57d4d1e9ccd5b5ae1ed591152a8e665876297311"} |
System Output |
{"Name":"fflb0108.png_5do","BuyerName":"Lorillard Research Center","CustomerId":"cust_dnwalc","SenderName":"Microbiological Associates Inc.","TotalAmount":5450,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"5,450.00\",\"5,450.00\",\"$5,450.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Research Center
Actual: Lorillard Research Center
|
?Total Correct? |
✅
Correct
Expected: 5450
Actual: 5450
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_dnwalc
Actual: cust_dnwalc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Microbiological Associates Inc.
Actual: Microbiological Associates Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/8f8110a10725df4b07694d4a57d4d1e9ccd5b5ae1ed591152a8e665876297311) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Microbiological Associates Inc.",
"BuyerName": "Lorillard Research Center",
"TotalAmount": 5450,
"NeedsApproval": true,
"CustomerId": "cust_dnwalc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶ffpm0094.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"ffpm0094.png","Invoice":"https://storage.googleapis.com/llm-test-files/4953a272053d045bec9058625ceba9a30b5d71acffabf6b372863fe1531c8169"} |
System Output |
{"Name":"ffpm0094.png_Aj3","BuyerName":"Duke University","CustomerId":null,"SenderName":"Chicago Marriott Downtown","TotalAmount":1533.54,"FilterResult":"Valid","ManagerEmail":null,"NeedsApproval":true,"BillingAmounts":"[\"$2.50\",\"$27.50\",\"$3.00\",\"$18.00\",\"$30.95\",\"$619.00\",\"$600.00\",\"$45.00\",\"$664.50\",\"$600.00\",\"$45.00\",\"$126.26\",\"$11.05\",\"$64.79\",\"$3.94\",\"$18.00\",\"$1,533.54\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Duke University
Actual: Duke University
|
?Total Correct? |
✅
Correct
Expected: 1533.54
Actual: 1533.54
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: null
Actual: null
|
?Manager Email Correct? |
✅
Correct
Expected: null
Actual: null
|
?Sender Name Correct? |
✅
Correct
Expected: Chicago Marriott Downtown
Actual: Chicago Marriott Downtown
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/4953a272053d045bec9058625ceba9a30b5d71acffabf6b372863fe1531c8169) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Chicago Marriott Downtown",
"BuyerName": "Duke University",
"TotalAmount": 1533.54,
"NeedsApproval": true
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶fkwb0046.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"fkwb0046.png","Invoice":"https://storage.googleapis.com/llm-test-files/c22cab79c1fe8ba5d455f46e5577dc75db7e84bd1604ee21d8c6f9669c293c54"} |
System Output |
{"Name":"fkwb0046.png_ULS","BuyerName":"The Tobacco Institute Inc","CustomerId":"cust_nxfkpn","SenderName":"Xerox Corporation","TotalAmount":571.94,"FilterResult":"Valid: Legible Xerox invoice; no tampering or malicious/misleading content observed.","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[539.57, 32.37, 571.94]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: THE TOBACCO INSTITUTE INC
Actual: The Tobacco Institute Inc
|
?Total Correct? |
✅
Correct
Expected: 571.94
Actual: 571.94
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_nxfkpn
Actual: cust_nxfkpn
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: XEROX CORPORATION
Actual: Xerox Corporation
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/c22cab79c1fe8ba5d455f46e5577dc75db7e84bd1604ee21d8c6f9669c293c54) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "XEROX CORPORATION",
"BuyerName": "THE TOBACCO INSTITUTE INC",
"TotalAmount": 571.94,
"NeedsApproval": false,
"CustomerId": "cust_nxfkpn",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶fmly0054.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"fmly0054.png","Invoice":"https://storage.googleapis.com/llm-test-files/f7f05c7ccf5069f6fc2554b00a16ad7465ddf690634c7855e4672bcec29be0b7"} |
System Output |
{"Name":"fmly0054.png_mL6","BuyerName":"Brown & Williamson Tobacco Co.","CustomerId":null,"SenderName":"Market Facts, Inc.","TotalAmount":12600,"FilterResult":"Valid","ManagerEmail":null,"NeedsApproval":true,"BillingAmounts":"[\"$37,800.00\",\"($25,200.00)\",\"$12,600.00\",\"$12,600.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Brown & Williamson Tobacco Co.
Actual: Brown & Williamson Tobacco Co.
|
?Total Correct? |
✅
Correct
Expected: 12600
Actual: 12600
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: null
Actual: null
|
?Manager Email Correct? |
✅
Correct
Expected: null
Actual: null
|
?Sender Name Correct? |
✅
Correct
Expected: Market Facts, Inc.
Actual: Market Facts, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/f7f05c7ccf5069f6fc2554b00a16ad7465ddf690634c7855e4672bcec29be0b7) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Market Facts, Inc.",
"BuyerName": "Brown & Williamson Tobacco Co.",
"TotalAmount": 12600,
"NeedsApproval": true
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶fqwm0045.png |
valid | ❌ | | | | 0.00 | |
Task Input |
{"Name":"fqwm0045.png","Invoice":"https://storage.googleapis.com/llm-test-files/b60a53877e8a9003e5974409773c0845ddbdb494c5299ec28ce0f3b751860000"} |
System Output |
No output available |
Explanation |
System returned nothing, but expected output: {
"SenderName": "Fannon-Luiers Associates Inc.",
"BuyerName": "Tobacco Institute",
"TotalAmount": 212.98,
"NeedsApproval": true,
"CustomerId": "cust_basfwy",
"ManagerEmail": "[email protected]"
}
|
?Valid? |
valid |
?Validation prediction |
invalid |
?Input validation correct? |
❌ |
?image |
 _ Document Processing Benchmark_files/b60a53877e8a9003e5974409773c0845ddbdb494c5299ec28ce0f3b751860000) |
?Weighted score |
0.00 |
?expectedOutput |
{
"SenderName": "Fannon-Luiers Associates Inc.",
"BuyerName": "Tobacco Institute",
"TotalAmount": 212.98,
"NeedsApproval": true,
"CustomerId": "cust_basfwy",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶fsgb0114.png |
valid | ✅ | ❌ | ✅ | ✅ | 0.83 | |
Task Input |
{"Name":"fsgb0114.png","Invoice":"https://storage.googleapis.com/llm-test-files/0c54d3c453a56f952d7ab32d653e30c5073a35c2798dd62085e4a7c882f6735a"} |
System Output |
{"Name":"fsgb0114.png_cba","BuyerName":"Legislative Intent Service","CustomerId":null,"SenderName":"Littler, Mendelson, Fastiff, Tichy & Mathiason","TotalAmount":852.8,"FilterResult":"Valid invoice: legible; no tampering or malicious content detected.","ManagerEmail":null,"NeedsApproval":false,"BillingAmounts":"[\"800.00\",\"52.80\",\"852.80\"]"} |
Explanation |
SenderName and BuyerName are swapped in the actual output.
|
?Valid? |
valid |
?Buyer Correct? |
❌
Incorrect
Expected: Littler, Mendelson, Fastiff, Tichy & Mathiason
Actual: Legislative Intent Service
|
?Total Correct? |
✅
Correct
Expected: 852.8
Actual: 852.8
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: null
Actual: null
|
?Manager Email Correct? |
✅
Correct
Expected: null
Actual: null
|
?Sender Name Correct? |
❌
Incorrect
Expected: LEGISLATIVE INTENT SERVICE
Actual: Littler, Mendelson, Fastiff, Tichy & Mathiason
|
?Input validation correct? |
✅ |
?Extractions correct? |
❌ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/0c54d3c453a56f952d7ab32d653e30c5073a35c2798dd62085e4a7c882f6735a) |
?Partial Extraction Match? |
|
?Weighted score |
0.83 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "LEGISLATIVE INTENT SERVICE",
"BuyerName": "Littler, Mendelson, Fastiff, Tichy & Mathiason",
"TotalAmount": 852.8,
"NeedsApproval": false
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶ftkk0067.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | buyerName:
Expected: "Mark Whatford Library, Philip Morris"
Actual: "Philip Morris"
|
Task Input |
{"Name":"ftkk0067.png","Invoice":"https://storage.googleapis.com/llm-test-files/57049a248c75e7855bcd7ba0406e3a47de7a85270f16f1fec75be0262412f481"} |
System Output |
{"Name":"ftkk0067.png_lMR","BuyerName":"Philip Morris","CustomerId":null,"SenderName":"Corporate Language Services","TotalAmount":475,"FilterResult":"Valid","ManagerEmail":null,"NeedsApproval":false,"BillingAmounts":"[\"$ 475.00\",\"$ 475.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Mark Whatford Library, Philip Morris
Actual: Philip Morris
|
?Total Correct? |
✅
Correct
Expected: 475
Actual: 475
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: null
Actual: null
|
?Manager Email Correct? |
✅
Correct
Expected: null
Actual: null
|
?Sender Name Correct? |
✅
Correct
Expected: Corporate Language Services
Actual: Corporate Language Services
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/57049a248c75e7855bcd7ba0406e3a47de7a85270f16f1fec75be0262412f481) |
?Partial Extraction Match? |
buyerName:
Expected: "Mark Whatford Library, Philip Morris"
Actual: "Philip Morris"
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Corporate Language Services",
"BuyerName": "Mark Whatford Library, Philip Morris",
"TotalAmount": 475,
"NeedsApproval": false
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶fzng0129.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"fzng0129.png","Invoice":"https://storage.googleapis.com/llm-test-files/93625da6758f48d8f8581bdb62059af8f4c68a48bf1809af69699729041a5662"} |
System Output |
{"Name":"fzng0129.png_Y-6","BuyerName":"Lorillard, Inc.","CustomerId":null,"SenderName":"Borriston Laboratories, Inc.","TotalAmount":1610,"FilterResult":"Valid: Invoice appears authentic, legible, and safe to process.","ManagerEmail":null,"NeedsApproval":true,"BillingAmounts":"[1610.00, 230.00, 1150.00, 460.00, 230.00, 230.00]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard, Inc.
Actual: Lorillard, Inc.
|
?Total Correct? |
✅
Correct
Expected: 1610
Actual: 1610
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: null
Actual: null
|
?Manager Email Correct? |
✅
Correct
Expected: null
Actual: null
|
?Sender Name Correct? |
✅
Correct
Expected: Borriston Laboratories, Inc.
Actual: Borriston Laboratories, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/93625da6758f48d8f8581bdb62059af8f4c68a48bf1809af69699729041a5662) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Borriston Laboratories, Inc.",
"BuyerName": "Lorillard, Inc.",
"TotalAmount": 1610,
"NeedsApproval": true
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶ggbh0038.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"ggbh0038.png","Invoice":"https://storage.googleapis.com/llm-test-files/1141519f40bc6de2b14d872a311aa11cc5c6e6e7770744d5724700a8bebe3e16"} |
System Output |
{"Name":"ggbh0038.png_SoP","BuyerName":"Tobacco Institute","CustomerId":"cust_basfwy","SenderName":"Fannon-Luers Associates Inc.","TotalAmount":283.75,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[283.75, 0.00, 283.75]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Tobacco Institute
Actual: Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 283.75
Actual: 283.75
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_basfwy
Actual: cust_basfwy
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Fannon-Luers Associates Inc.
Actual: Fannon-Luers Associates Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/1141519f40bc6de2b14d872a311aa11cc5c6e6e7770744d5724700a8bebe3e16) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Fannon-Luers Associates Inc.",
"BuyerName": "Tobacco Institute",
"TotalAmount": 283.75,
"NeedsApproval": true,
"CustomerId": "cust_basfwy",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶ghyx0215.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"ghyx0215.png","Invoice":"https://storage.googleapis.com/llm-test-files/a04e1477094c761769911b93598fbfec3277e60cb856cff8f28f730923ccda3f"} |
System Output |
{"Name":"ghyx0215.png_XWz","BuyerName":"The Council For Tobacco Research-USA, Inc.","CustomerId":null,"SenderName":"ICF Incorporated","TotalAmount":3724.58,"FilterResult":"Valid: Legible, authentic-looking invoice; no tampering or malicious content detected.","ManagerEmail":null,"NeedsApproval":true,"BillingAmounts":"[\"$100.00\",\"$3,700.00\",\"$24.58\",\"$3,724.58\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Council For Tobacco Research-USA, Inc.
Actual: The Council For Tobacco Research-USA, Inc.
|
?Total Correct? |
✅
Correct
Expected: 3724.58
Actual: 3724.58
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: null
Actual: null
|
?Manager Email Correct? |
✅
Correct
Expected: null
Actual: null
|
?Sender Name Correct? |
✅
Correct
Expected: ICF Incorporated
Actual: ICF Incorporated
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/a04e1477094c761769911b93598fbfec3277e60cb856cff8f28f730923ccda3f) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "ICF Incorporated",
"BuyerName": "The Council For Tobacco Research-USA, Inc.",
"TotalAmount": 3724.58,
"NeedsApproval": true
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶glyg0129.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | senderName:
Expected: "Borrison Laboratories, Inc."
Actual: "Borriston Laboratories, Inc."
|
Task Input |
{"Name":"glyg0129.png","Invoice":"https://storage.googleapis.com/llm-test-files/9645153df5c58924a6eefcccf5acdc3de57d819c76ca4e29d69a17c1382bba5c"} |
System Output |
{"Name":"glyg0129.png_ZOS","BuyerName":"Lorillard Inc.","CustomerId":null,"SenderName":"Borriston Laboratories, Inc.","TotalAmount":7650,"FilterResult":"Valid","ManagerEmail":null,"NeedsApproval":true,"BillingAmounts":"[\"$7,650.00\",\"$2,550.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Inc.
Actual: Lorillard Inc.
|
?Total Correct? |
✅
Correct
Expected: 7650
Actual: 7650
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: null
Actual: null
|
?Manager Email Correct? |
✅
Correct
Expected: null
Actual: null
|
?Sender Name Correct? |
✅
Correct
Expected: Borrison Laboratories, Inc.
Actual: Borriston Laboratories, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/9645153df5c58924a6eefcccf5acdc3de57d819c76ca4e29d69a17c1382bba5c) |
?Partial Extraction Match? |
senderName:
Expected: "Borrison Laboratories, Inc."
Actual: "Borriston Laboratories, Inc."
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Borrison Laboratories, Inc.",
"BuyerName": "Lorillard Inc.",
"TotalAmount": 7650,
"NeedsApproval": true
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶gmfh0118.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | senderName:
Expected: "Petersen Publishing Co."
Actual: "Petersen Publishing Company"
|
Task Input |
{"Name":"gmfh0118.png","Invoice":"https://storage.googleapis.com/llm-test-files/6d6aa22dbd14cc2be7fc0cd864b7ae8bf58a898899a1044bee305216564cef3b"} |
System Output |
{"Name":"gmfh0118.png_ZS9","BuyerName":"Lorillard Media Services","CustomerId":null,"SenderName":"Petersen Publishing Company","TotalAmount":2851.75,"FilterResult":"Valid","ManagerEmail":null,"NeedsApproval":true,"BillingAmounts":"[3355.00, -503.25, 2851.75]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Media Services
Actual: Lorillard Media Services
|
?Total Correct? |
✅
Correct
Expected: 2851.75
Actual: 2851.75
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: null
Actual: null
|
?Manager Email Correct? |
✅
Correct
Expected: null
Actual: null
|
?Sender Name Correct? |
✅
Correct
Expected: Petersen Publishing Co.
Actual: Petersen Publishing Company
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/6d6aa22dbd14cc2be7fc0cd864b7ae8bf58a898899a1044bee305216564cef3b) |
?Partial Extraction Match? |
senderName:
Expected: "Petersen Publishing Co."
Actual: "Petersen Publishing Company"
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Petersen Publishing Co.",
"BuyerName": "Lorillard Media Services",
"TotalAmount": 2851.75,
"NeedsApproval": true
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶gmlv0002.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"gmlv0002.png","Invoice":"https://storage.googleapis.com/llm-test-files/9e00cb89b1dd911102077b163dbc853e19c5e12e5757885b1cfcf5fb831b8265"} |
System Output |
{"Name":"gmlv0002.png_1Es","BuyerName":"Philip Morris U.S.A.","CustomerId":null,"SenderName":"Toppmeyer Research, Inc.","TotalAmount":5606.25,"FilterResult":"Valid","ManagerEmail":null,"NeedsApproval":true,"BillingAmounts":"[\"$4,500.00\",\"$1,106.25\",\"$5,606.25\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Philip Morris U.S.A.
Actual: Philip Morris U.S.A.
|
?Total Correct? |
✅
Correct
Expected: 5606.25
Actual: 5606.25
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: null
Actual: null
|
?Manager Email Correct? |
✅
Correct
Expected: null
Actual: null
|
?Sender Name Correct? |
✅
Correct
Expected: Toppmeyer Research, Inc.
Actual: Toppmeyer Research, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/9e00cb89b1dd911102077b163dbc853e19c5e12e5757885b1cfcf5fb831b8265) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Toppmeyer Research, Inc.",
"BuyerName": "Philip Morris U.S.A.",
"TotalAmount": 5606.25,
"NeedsApproval": true
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶gnfb0105.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"gnfb0105.png","Invoice":"https://storage.googleapis.com/llm-test-files/3057b7d27d1ea8870439a3a367411e5b930187952295b0dc057e37eb0bad9e02"} |
System Output |
{"Name":"gnfb0105.png_V7A","BuyerName":"Lorillard Company","CustomerId":"cust_ybwqzc","SenderName":"Metropolitan Sunday Newspapers, Inc.","TotalAmount":27162.6,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"16,518.00\",\"14,040.30\",\"3,178.00\",\"2,701.30\",\"12,260.00\",\"10,421.00\",\"31,956.00\",\"27,162.60\",\"27,162.60\",\"14,040.30\",\"10,421.00\",\"2,701.30\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Company
Actual: Lorillard Company
|
?Total Correct? |
✅
Correct
Expected: 27162.6
Actual: 27162.6
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_ybwqzc
Actual: cust_ybwqzc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Metropolitan Sunday Newspapers, Inc.
Actual: Metropolitan Sunday Newspapers, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/3057b7d27d1ea8870439a3a367411e5b930187952295b0dc057e37eb0bad9e02) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Metropolitan Sunday Newspapers, Inc.",
"BuyerName": "Lorillard Company",
"TotalAmount": 27162.6,
"NeedsApproval": true,
"CustomerId": "cust_ybwqzc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶gxlh0027.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"gxlh0027.png","Invoice":"https://storage.googleapis.com/llm-test-files/b394470ec8abb2314c5aae3514637bbff76c3efe009623fded69cc8120aad68b"} |
System Output |
{"Name":"gxlh0027.png_at0","BuyerName":"The Tobacco Institute","CustomerId":"cust_rnbufp","SenderName":"Peake Printers, Inc.","TotalAmount":1905.88,"FilterResult":"Valid: Legible invoice; no tampering or malicious content observed.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"1798.000\",\"1798.00\",\"1798.00\",\"107.88\",\"1905.88\",\"1905.88\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Tobacco Institute
Actual: The Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 1905.88
Actual: 1905.88
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_rnbufp
Actual: cust_rnbufp
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Peake Printers, Inc.
Actual: Peake Printers, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/b394470ec8abb2314c5aae3514637bbff76c3efe009623fded69cc8120aad68b) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Peake Printers, Inc.",
"BuyerName": "The Tobacco Institute",
"TotalAmount": 1905.88,
"NeedsApproval": true,
"CustomerId": "cust_rnbufp",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶hmxd0060.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"hmxd0060.png","Invoice":"https://storage.googleapis.com/llm-test-files/e827779852a4bba47259c7ba1971fb1deb5d43460c0270221b22617e0cc6855c"} |
System Output |
{"Name":"hmxd0060.png_xhd","BuyerName":"GCI","CustomerId":null,"SenderName":"1/33 Productions, Inc.","TotalAmount":99825.5,"FilterResult":"Valid","ManagerEmail":null,"NeedsApproval":true,"BillingAmounts":"[\"$199,651.00\",\"$99,825.50\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: GCI
Actual: GCI
|
?Total Correct? |
✅
Correct
Expected: 99825.5
Actual: 99825.5
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: null
Actual: null
|
?Manager Email Correct? |
✅
Correct
Expected: null
Actual: null
|
?Sender Name Correct? |
✅
Correct
Expected: 1/33 Productions Inc
Actual: 1/33 Productions, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/e827779852a4bba47259c7ba1971fb1deb5d43460c0270221b22617e0cc6855c) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "1/33 Productions Inc",
"BuyerName": "GCI",
"TotalAmount": 99825.5,
"NeedsApproval": true
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶hnjb0108.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"hnjb0108.png","Invoice":"https://storage.googleapis.com/llm-test-files/3a91f7b9311ef4dbaa0a27708d0959f175a03df95ad555334eaf3525532bb62f"} |
System Output |
{"Name":"hnjb0108.png_v8d","BuyerName":"Lorillard Research Center","CustomerId":"cust_dnwalc","SenderName":"Microbiological Associates Inc.","TotalAmount":5237.72,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[5237.72]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Research Center
Actual: Lorillard Research Center
|
?Total Correct? |
✅
Correct
Expected: 5237.72
Actual: 5237.72
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_dnwalc
Actual: cust_dnwalc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Microbiological Associates Inc.
Actual: Microbiological Associates Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/3a91f7b9311ef4dbaa0a27708d0959f175a03df95ad555334eaf3525532bb62f) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Microbiological Associates Inc.",
"BuyerName": "Lorillard Research Center",
"TotalAmount": 5237.72,
"NeedsApproval": true,
"CustomerId": "cust_dnwalc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶hqpg0039.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | senderName:
Expected: "Don Richard Associates"
Actual: "Don Richard Associates of Washington D.C."
|
Task Input |
{"Name":"hqpg0039.png","Invoice":"https://storage.googleapis.com/llm-test-files/c332ea883dd79a0a2127bcac837013f13895a83c8ebd9265c413dd29b7a30dea"} |
System Output |
{"Name":"hqpg0039.png_Bx9","BuyerName":"Tobacco Institute","CustomerId":"cust_gmrpbc","SenderName":"Don Richard Associates of Washington D.C.","TotalAmount":507,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[\"$13.50\",\"$405.75\",\"$20.25\",\"$101.25\",\"$507.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Tobacco Institute
Actual: Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 507
Actual: 507
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_gmrpbc
Actual: cust_gmrpbc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Don Richard Associates
Actual: Don Richard Associates of Washington D.C.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/c332ea883dd79a0a2127bcac837013f13895a83c8ebd9265c413dd29b7a30dea) |
?Partial Extraction Match? |
senderName:
Expected: "Don Richard Associates"
Actual: "Don Richard Associates of Washington D.C."
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Don Richard Associates",
"BuyerName": "Tobacco Institute",
"TotalAmount": 507,
"NeedsApproval": false,
"CustomerId": "cust_gmrpbc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶htvh0025.png |
valid | ❌ | | | | 0.00 | |
Task Input |
{"Name":"htvh0025.png","Invoice":"https://storage.googleapis.com/llm-test-files/f20a32e883795d39fbc2b2b2922cba19faada64a5288beeedd5ee6f4580bbc8b"} |
System Output |
No output available |
Explanation |
System returned nothing, but expected output: {
"SenderName": "Philip Morris U.S.A.",
"BuyerName": "Gary M. Reynolds & Assoc., Inc.",
"TotalAmount": 5000,
"NeedsApproval": true,
"CustomerId": "cust_ygirss",
"ManagerEmail": "[email protected]"
}
|
?Valid? |
valid |
?Validation prediction |
invalid |
?Input validation correct? |
❌ |
?image |
 _ Document Processing Benchmark_files/f20a32e883795d39fbc2b2b2922cba19faada64a5288beeedd5ee6f4580bbc8b) |
?Weighted score |
0.00 |
?expectedOutput |
{
"SenderName": "Philip Morris U.S.A.",
"BuyerName": "Gary M. Reynolds & Assoc., Inc.",
"TotalAmount": 5000,
"NeedsApproval": true,
"CustomerId": "cust_ygirss",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶hzpn0004.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"hzpn0004.png","Invoice":"https://storage.googleapis.com/llm-test-files/3bb2c2eb32aef2b5ae810700dbf133fa21afff6d5679e8707f84357cb0d6f66c"} |
System Output |
{"Name":"hzpn0004.png_S1L","BuyerName":"Philip Morris USA","CustomerId":"cust_rivhlq","SenderName":"John Schreiber Group, Inc.","TotalAmount":626464,"FilterResult":"Valid: Legible, authentic-looking invoice; no tampering or malicious content observed.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$626,464.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Philip Morris USA
Actual: Philip Morris USA
|
?Total Correct? |
✅
Correct
Expected: 626464
Actual: 626464
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_rivhlq
Actual: cust_rivhlq
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: John Schreiber Group, Inc.
Actual: John Schreiber Group, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/3bb2c2eb32aef2b5ae810700dbf133fa21afff6d5679e8707f84357cb0d6f66c) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "John Schreiber Group, Inc.",
"BuyerName": "Philip Morris USA",
"TotalAmount": 626464,
"NeedsApproval": true,
"CustomerId": "cust_rivhlq",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶hzpx0071.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"hzpx0071.png","Invoice":"https://storage.googleapis.com/llm-test-files/dd50f5caaa4228140e0f3299efd9ca93a6b2e32905a195f1b99aa9775e5b12b2"} |
System Output |
{"Name":"hzpx0071.png_QDB","BuyerName":"Lorillard Inc.","CustomerId":"cust_jnmoaa","SenderName":"U.S. Suburban Press, Inc.","TotalAmount":2116.65,"FilterResult":"Valid: Authentic, legible invoice; no tampering or malicious instructions detected.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[2116.65, 2490.18, 373.53, 462.0, 1256.64, 771.54, 11.0, 29.92, 18.37, 384.87, 1068.14, 655.81, 7.85, 10.5]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Inc.
Actual: Lorillard Inc.
|
?Total Correct? |
✅
Correct
Expected: 2116.65
Actual: 2116.65
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_jnmoaa
Actual: cust_jnmoaa
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: U.S. Suburban Press, Inc.
Actual: U.S. Suburban Press, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/dd50f5caaa4228140e0f3299efd9ca93a6b2e32905a195f1b99aa9775e5b12b2) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "U.S. Suburban Press, Inc.",
"BuyerName": "Lorillard Inc.",
"TotalAmount": 2116.65,
"NeedsApproval": true,
"CustomerId": "cust_jnmoaa",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶img_0.png |
invalid | ✅ | | | | 1.00 | |
Task Input |
{"Name":"img_0.png","Invoice":"https://storage.googleapis.com/llm-test-files/45b60019bb531ff49ed71da0dcd34eb9dd22dfcdd9b052d550cf3a377266ae58"} |
System Output |
No output available |
Explanation |
System didn't return any output and that was expected
|
?Valid? |
invalid |
?Validation prediction |
invalid |
?Input validation correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/45b60019bb531ff49ed71da0dcd34eb9dd22dfcdd9b052d550cf3a377266ae58) |
?Weighted score |
1.00 |
Trace |
Trace |
Status |
evalSuccess |
|
▶img_1.png |
invalid | ✅ | | | | 1.00 | |
Task Input |
{"Name":"img_1.png","Invoice":"https://storage.googleapis.com/llm-test-files/fcd6ce53d8ec41e4c8577d77293413b6ae78aebc5add401170e1e410399bc87f"} |
System Output |
No output available |
Explanation |
System didn't return any output and that was expected
|
?Valid? |
invalid |
?Validation prediction |
invalid |
?Input validation correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/fcd6ce53d8ec41e4c8577d77293413b6ae78aebc5add401170e1e410399bc87f) |
?Weighted score |
1.00 |
Trace |
Trace |
Status |
evalSuccess |
|
▶img_10.png |
invalid | ✅ | | | | 1.00 | |
Task Input |
{"Name":"img_10.png","Invoice":"https://storage.googleapis.com/llm-test-files/c79d4ddf1cdaa02133768b58260f330ce0006e0f73778707abb4d4495b7a58b7"} |
System Output |
No output available |
Explanation |
System didn't return any output and that was expected
|
?Valid? |
invalid |
?Validation prediction |
invalid |
?Input validation correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/c79d4ddf1cdaa02133768b58260f330ce0006e0f73778707abb4d4495b7a58b7) |
?Weighted score |
1.00 |
Trace |
Trace |
Status |
evalSuccess |
|
▶img_2.png |
invalid | ✅ | | | | 1.00 | |
Task Input |
{"Name":"img_2.png","Invoice":"https://storage.googleapis.com/llm-test-files/cc9e41e94ecb12914b8dc50a5b459f7170609ec5ad8145b87b120dcd5f7125c4"} |
System Output |
No output available |
Explanation |
System didn't return any output and that was expected
|
?Valid? |
invalid |
?Validation prediction |
invalid |
?Input validation correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/cc9e41e94ecb12914b8dc50a5b459f7170609ec5ad8145b87b120dcd5f7125c4) |
?Weighted score |
1.00 |
Trace |
Trace |
Status |
evalSuccess |
|
▶img_3.png |
invalid | ✅ | | | | 1.00 | |
Task Input |
{"Name":"img_3.png","Invoice":"https://storage.googleapis.com/llm-test-files/7f334f33914926307340e44184165a8d305a968ad1c73890aa058cf040762504"} |
System Output |
No output available |
Explanation |
System didn't return any output and that was expected
|
?Valid? |
invalid |
?Validation prediction |
invalid |
?Input validation correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/7f334f33914926307340e44184165a8d305a968ad1c73890aa058cf040762504) |
?Weighted score |
1.00 |
Trace |
Trace |
Status |
evalSuccess |
|
▶img_4.png |
invalid | ✅ | | | | 1.00 | |
Task Input |
{"Name":"img_4.png","Invoice":"https://storage.googleapis.com/llm-test-files/16e31ea44bd3a5ddc88fad825f732981aa199043fe589bcf65f95028583e2d2c"} |
System Output |
No output available |
Explanation |
System didn't return any output and that was expected
|
?Valid? |
invalid |
?Validation prediction |
invalid |
?Input validation correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/16e31ea44bd3a5ddc88fad825f732981aa199043fe589bcf65f95028583e2d2c) |
?Weighted score |
1.00 |
Trace |
Trace |
Status |
evalSuccess |
|
▶img_5.png |
invalid | ✅ | | | | 1.00 | |
Task Input |
{"Name":"img_5.png","Invoice":"https://storage.googleapis.com/llm-test-files/9a2517394b8b848aa022fc9d4e9401f465b837df640e6bd03dcfe15f2ab33994"} |
System Output |
No output available |
Explanation |
System didn't return any output and that was expected
|
?Valid? |
invalid |
?Validation prediction |
invalid |
?Input validation correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/9a2517394b8b848aa022fc9d4e9401f465b837df640e6bd03dcfe15f2ab33994) |
?Weighted score |
1.00 |
Trace |
Trace |
Status |
evalSuccess |
|
▶img_6.png |
invalid | ✅ | | | | 1.00 | |
Task Input |
{"Name":"img_6.png","Invoice":"https://storage.googleapis.com/llm-test-files/04f567e8b9cb7326626a744232d65957f7bec57bfe57877dcf44bccdfcf059ca"} |
System Output |
No output available |
Explanation |
System didn't return any output and that was expected
|
?Valid? |
invalid |
?Validation prediction |
invalid |
?Input validation correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/04f567e8b9cb7326626a744232d65957f7bec57bfe57877dcf44bccdfcf059ca) |
?Weighted score |
1.00 |
Trace |
Trace |
Status |
evalSuccess |
|
▶img_7.png |
invalid | ✅ | | | | 1.00 | |
Task Input |
{"Name":"img_7.png","Invoice":"https://storage.googleapis.com/llm-test-files/ae4c3964e995caa5ff31a522d9143097021e1f535388b6aa31f6167fcfe60675"} |
System Output |
No output available |
Explanation |
System didn't return any output and that was expected
|
?Valid? |
invalid |
?Validation prediction |
invalid |
?Input validation correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/ae4c3964e995caa5ff31a522d9143097021e1f535388b6aa31f6167fcfe60675) |
?Weighted score |
1.00 |
Trace |
Trace |
Status |
evalSuccess |
|
▶img_8.png |
invalid | ✅ | | | | 1.00 | |
Task Input |
{"Name":"img_8.png","Invoice":"https://storage.googleapis.com/llm-test-files/6ca8021c838a372e02cbb2971360109f882ce88b0a1ad40ddb70aa1c7a7eec83"} |
System Output |
No output available |
Explanation |
System didn't return any output and that was expected
|
?Valid? |
invalid |
?Validation prediction |
invalid |
?Input validation correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/6ca8021c838a372e02cbb2971360109f882ce88b0a1ad40ddb70aa1c7a7eec83) |
?Weighted score |
1.00 |
Trace |
Trace |
Status |
evalSuccess |
|
▶img_9.png |
invalid | ✅ | | | | 1.00 | |
Task Input |
{"Name":"img_9.png","Invoice":"https://storage.googleapis.com/llm-test-files/8398b5b6c030602d0b87c41745bd8ddcae298b3175d03b7af93dc95837cdf66d"} |
System Output |
No output available |
Explanation |
System didn't return any output and that was expected
|
?Valid? |
invalid |
?Validation prediction |
invalid |
?Input validation correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/8398b5b6c030602d0b87c41745bd8ddcae298b3175d03b7af93dc95837cdf66d) |
?Weighted score |
1.00 |
Trace |
Trace |
Status |
evalSuccess |
|
▶jjlh0144.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"jjlh0144.png","Invoice":"https://storage.googleapis.com/llm-test-files/93acb230cd7ee4e0acec1ccd6b87ecfec7fd4ee00eefed9c877e6cfed64538f0"} |
System Output |
{"Name":"jjlh0144.png_DY4","BuyerName":"The American Tobacco Company","CustomerId":"cust_xpytbw","SenderName":"The Andrew Jergens Company","TotalAmount":20044.8,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$34.56\",\"$32,279.04\",\"$34.56\",\"($12,234.24)\",\"$20,044.80\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The American Tobacco Company
Actual: The American Tobacco Company
|
?Total Correct? |
✅
Correct
Expected: 20044.8
Actual: 20044.8
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_xpytbw
Actual: cust_xpytbw
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: The Andrew Jergens Company
Actual: The Andrew Jergens Company
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/93acb230cd7ee4e0acec1ccd6b87ecfec7fd4ee00eefed9c877e6cfed64538f0) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "The Andrew Jergens Company",
"BuyerName": "The American Tobacco Company",
"TotalAmount": 20044.8,
"NeedsApproval": true,
"CustomerId": "cust_xpytbw",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶jlld0035.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | senderName:
Expected: "Pitney Bowes"
Actual: "Pitney Bowes Inc"
|
Task Input |
{"Name":"jlld0035.png","Invoice":"https://storage.googleapis.com/llm-test-files/c236c9c73bff2261b1adcfce5d6f41e08d308f86c50f9f65beae31d8826ef339"} |
System Output |
{"Name":"jlld0035.png_i3Z","BuyerName":"Tobacco Institute","CustomerId":"cust_wywxbl","SenderName":"Pitney Bowes Inc","TotalAmount":275,"FilterResult":"Valid: clear, legible invoice; no tampering or malicious content detected.","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[275.00, 275.00, 275.00, 275.00, 0.75]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Tobacco Institute
Actual: Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 275
Actual: 275
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_wywxbl
Actual: cust_wywxbl
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Pitney Bowes
Actual: Pitney Bowes Inc
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/c236c9c73bff2261b1adcfce5d6f41e08d308f86c50f9f65beae31d8826ef339) |
?Partial Extraction Match? |
senderName:
Expected: "Pitney Bowes"
Actual: "Pitney Bowes Inc"
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Pitney Bowes",
"BuyerName": "Tobacco Institute",
"TotalAmount": 275,
"NeedsApproval": false,
"CustomerId": "cust_wywxbl",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶jnbn0018.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | senderName:
Expected: "Bozell"
Actual: "Bozell Worldwide, Inc."
|
Task Input |
{"Name":"jnbn0018.png","Invoice":"https://storage.googleapis.com/llm-test-files/b47f22dcd4ff97ac4e4c490acb64a00199b067995e835ab4229219d2f4483f48"} |
System Output |
{"Name":"jnbn0018.png__SC","BuyerName":"Lorillard Tobacco Company Inc.","CustomerId":"cust_ljmyto","SenderName":"Bozell Worldwide, Inc.","TotalAmount":2305.35,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[2305.35,0.0,0.0,2305.35,25419.86,2792.3,6127.6,5516.87,4551.54,2055.52,-4551.54,7417.55,482.49,1027.53]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Tobacco Company Inc.
Actual: Lorillard Tobacco Company Inc.
|
?Total Correct? |
✅
Correct
Expected: 2305.35
Actual: 2305.35
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_ljmyto
Actual: cust_ljmyto
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Bozell
Actual: Bozell Worldwide, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/b47f22dcd4ff97ac4e4c490acb64a00199b067995e835ab4229219d2f4483f48) |
?Partial Extraction Match? |
senderName:
Expected: "Bozell"
Actual: "Bozell Worldwide, Inc."
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Bozell",
"BuyerName": "Lorillard Tobacco Company Inc.",
"TotalAmount": 2305.35,
"NeedsApproval": true,
"CustomerId": "cust_ljmyto",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶jygm0045.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"jygm0045.png","Invoice":"https://storage.googleapis.com/llm-test-files/a0729041ddda66fb5de7a4d698c290ae35b9e03feedb12e20551d825a8cee477"} |
System Output |
{"Name":"jygm0045.png_pNe","BuyerName":"The Tobacco Institute","CustomerId":"cust_rnbufp","SenderName":"Peake Printers, Inc.","TotalAmount":525,"FilterResult":"Valid - legible, no tampering or malicious content observed.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"525.000\",\"525.00\",\"525.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Tobacco Institute
Actual: The Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 525
Actual: 525
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_rnbufp
Actual: cust_rnbufp
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Peake Printers, Inc.
Actual: Peake Printers, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/a0729041ddda66fb5de7a4d698c290ae35b9e03feedb12e20551d825a8cee477) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Peake Printers, Inc.",
"BuyerName": "The Tobacco Institute",
"TotalAmount": 525,
"NeedsApproval": true,
"CustomerId": "cust_rnbufp",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶jykj0071.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"jykj0071.png","Invoice":"https://storage.googleapis.com/llm-test-files/c9cf6f2a1da216d48a5758713138085730425ce012cc6a6a180904f7a06c43a9"} |
System Output |
{"Name":"jykj0071.png_PXI","BuyerName":"Philip Morris","CustomerId":"cust_mxmfmj","SenderName":"Lyford Strategy + Communications","TotalAmount":3000,"FilterResult":"Valid: legible and appears authentic; no tampering or malicious content detected.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$3,000.00\",\"3,000.00\",\"3,000.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Philip Morris
Actual: Philip Morris
|
?Total Correct? |
✅
Correct
Expected: 3000
Actual: 3000
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_mxmfmj
Actual: cust_mxmfmj
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Lyford Strategy + Communications
Actual: Lyford Strategy + Communications
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/c9cf6f2a1da216d48a5758713138085730425ce012cc6a6a180904f7a06c43a9) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Lyford Strategy + Communications",
"BuyerName": "Philip Morris",
"TotalAmount": 3000,
"NeedsApproval": true,
"CustomerId": "cust_mxmfmj",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶kjkv0031.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"kjkv0031.png","Invoice":"https://storage.googleapis.com/llm-test-files/b754b77e2e5b206de6460e553909bd0df5b2a84634e21b14f2e6b5c10a0ef9f5"} |
System Output |
{"Name":"kjkv0031.png__ko","BuyerName":"Philip Morris Inc.","CustomerId":"cust_ujwjmw","SenderName":"Gary M. Reynolds & Associates, Inc.","TotalAmount":21965.51,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[1669.98, 2448.00, 295.68, 2066.36, 400.00, 604.30, 14481.19, 21965.51]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Philip Morris Inc.
Actual: Philip Morris Inc.
|
?Total Correct? |
✅
Correct
Expected: 21965.51
Actual: 21965.51
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_ujwjmw
Actual: cust_ujwjmw
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Gary M. Reynolds & Associates, Inc.
Actual: Gary M. Reynolds & Associates, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/b754b77e2e5b206de6460e553909bd0df5b2a84634e21b14f2e6b5c10a0ef9f5) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Gary M. Reynolds & Associates, Inc.",
"BuyerName": "Philip Morris Inc.",
"TotalAmount": 21965.51,
"NeedsApproval": true,
"CustomerId": "cust_ujwjmw",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶ktdb0046.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | buyerName:
Expected: "Tobacco Institute"
Actual: "Tobacco Institute Testing Lab"
|
Task Input |
{"Name":"ktdb0046.png","Invoice":"https://storage.googleapis.com/llm-test-files/762f010ec99641480607848dd960ad1b684c768524c5477a40a72e1c88f436fd"} |
System Output |
{"Name":"ktdb0046.png_F_u","BuyerName":"Tobacco Institute Testing Lab","CustomerId":"cust_nxfkpn","SenderName":"Xerox Corporation","TotalAmount":48.52,"FilterResult":"Valid: Legible Xerox invoice; no signs of tampering or malicious content.","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[\"46.21\",\"2.31\",\"48.52\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Tobacco Institute
Actual: Tobacco Institute Testing Lab
|
?Total Correct? |
✅
Correct
Expected: 48.52
Actual: 48.52
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_nxfkpn
Actual: cust_nxfkpn
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Xerox Corporation
Actual: Xerox Corporation
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/762f010ec99641480607848dd960ad1b684c768524c5477a40a72e1c88f436fd) |
?Partial Extraction Match? |
buyerName:
Expected: "Tobacco Institute"
Actual: "Tobacco Institute Testing Lab"
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Xerox Corporation",
"BuyerName": "Tobacco Institute",
"TotalAmount": 48.52,
"NeedsApproval": false,
"CustomerId": "cust_nxfkpn",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶lfdl0024.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"lfdl0024.png","Invoice":"https://storage.googleapis.com/llm-test-files/c52b14b640644a1437438d23fab22409a2fca2a147035854f8b845bf39fa84f0"} |
System Output |
{"Name":"lfdl0024.png_T7i","BuyerName":"Philip Morris USA (MEDIA)","CustomerId":"cust_asogei","SenderName":"E. Morris Communications, Inc.","TotalAmount":7990,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[7990.00]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Philip Morris USA (Media)
Actual: Philip Morris USA (MEDIA)
|
?Total Correct? |
✅
Correct
Expected: 7990
Actual: 7990
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_asogei
Actual: cust_asogei
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: E. Morris Communications, Inc.
Actual: E. Morris Communications, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/c52b14b640644a1437438d23fab22409a2fca2a147035854f8b845bf39fa84f0) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "E. Morris Communications, Inc.",
"BuyerName": "Philip Morris USA (Media)",
"TotalAmount": 7990,
"NeedsApproval": true,
"CustomerId": "cust_asogei",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶lkxd0060.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | senderName:
Expected: "Three Tree Productions"
Actual: "Three Tree Productions, Inc."
|
Task Input |
{"Name":"lkxd0060.png","Invoice":"https://storage.googleapis.com/llm-test-files/37ad2f0108c9d168bf98e49bc4450174937bcc03d3bb2362e3b41c325843889a"} |
System Output |
{"Name":"lkxd0060.png__a8","BuyerName":"GCI Advertising","CustomerId":"cust_aboqxe","SenderName":"Three Tree Productions, Inc.","TotalAmount":2000,"FilterResult":"Valid: legible; no tampering or malicious instructions detected.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[2000.00, 0.00, 2000.00]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: GCI Advertising
Actual: GCI Advertising
|
?Total Correct? |
✅
Correct
Expected: 2000
Actual: 2000
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_aboqxe
Actual: cust_aboqxe
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Three Tree Productions
Actual: Three Tree Productions, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/37ad2f0108c9d168bf98e49bc4450174937bcc03d3bb2362e3b41c325843889a) |
?Partial Extraction Match? |
senderName:
Expected: "Three Tree Productions"
Actual: "Three Tree Productions, Inc."
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Three Tree Productions",
"BuyerName": "GCI Advertising",
"TotalAmount": 2000,
"NeedsApproval": true,
"CustomerId": "cust_aboqxe",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶llbh0038.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"llbh0038.png","Invoice":"https://storage.googleapis.com/llm-test-files/418de34eddf45b31024dc0981f3f00e0da63816bfeb907290cd956fa37bad5c8"} |
System Output |
{"Name":"llbh0038.png_vhy","BuyerName":"Tobacco Institute","CustomerId":"cust_basfwy","SenderName":"Fannon-Luers Associates Inc.","TotalAmount":4496.68,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[2925.00, 1571.68, 0.00, 4496.68]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Tobacco Institute
Actual: Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 4496.68
Actual: 4496.68
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_basfwy
Actual: cust_basfwy
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Fannon-Luers Associates Inc.
Actual: Fannon-Luers Associates Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/418de34eddf45b31024dc0981f3f00e0da63816bfeb907290cd956fa37bad5c8) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Fannon-Luers Associates Inc.",
"BuyerName": "Tobacco Institute",
"TotalAmount": 4496.68,
"NeedsApproval": true,
"CustomerId": "cust_basfwy",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶lswj0034.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | buyerName:
Expected: "Tobacco Institute"
Actual: "TABACCO INSTITUTE"
|
Task Input |
{"Name":"lswj0034.png","Invoice":"https://storage.googleapis.com/llm-test-files/0a4d04e5d019cb1ea0a7ac206991d96dd248a1cac248f6c4ff2c6f5d9ded12d8"} |
System Output |
{"Name":"lswj0034.png_h1H","BuyerName":"TABACCO INSTITUTE","CustomerId":"cust_twvvcc","SenderName":"CDI Temporary Services, Inc.","TotalAmount":93.92,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[5.87, 46.96, 5.87, 46.96, 93.92]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Tobacco Institute
Actual: TABACCO INSTITUTE
|
?Total Correct? |
✅
Correct
Expected: 93.92
Actual: 93.92
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_twvvcc
Actual: cust_twvvcc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: CDI Temporary Services, Inc.
Actual: CDI Temporary Services, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/0a4d04e5d019cb1ea0a7ac206991d96dd248a1cac248f6c4ff2c6f5d9ded12d8) |
?Partial Extraction Match? |
buyerName:
Expected: "Tobacco Institute"
Actual: "TABACCO INSTITUTE"
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "CDI Temporary Services, Inc.",
"BuyerName": "Tobacco Institute",
"TotalAmount": 93.92,
"NeedsApproval": false,
"CustomerId": "cust_twvvcc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶lzph0118.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | senderName:
Expected: "Ayrett, Free & Ginsberg, Inc."
Actual: "Avrett, Free & Ginsberg, Inc."
|
Task Input |
{"Name":"lzph0118.png","Invoice":"https://storage.googleapis.com/llm-test-files/597aec5261ef078372765d582c7bbbf7876768748918883b935b7e86f1ca8bbf"} |
System Output |
{"Name":"lzph0118.png_4gF","BuyerName":"Lorillard Tobacco Co.","CustomerId":"cust_ycoxvf","SenderName":"Avrett, Free & Ginsberg, Inc.","TotalAmount":541.25,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"0.00\",\"500.00\",\"41.25\",\"541.25\",\"1,786.13\",\"1,053.99\",\"732.14\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Tobacco Co.
Actual: Lorillard Tobacco Co.
|
?Total Correct? |
✅
Correct
Expected: 541.25
Actual: 541.25
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_ycoxvf
Actual: cust_ycoxvf
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Ayrett, Free & Ginsberg, Inc.
Actual: Avrett, Free & Ginsberg, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/597aec5261ef078372765d582c7bbbf7876768748918883b935b7e86f1ca8bbf) |
?Partial Extraction Match? |
senderName:
Expected: "Ayrett, Free & Ginsberg, Inc."
Actual: "Avrett, Free & Ginsberg, Inc."
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Ayrett, Free & Ginsberg, Inc.",
"BuyerName": "Lorillard Tobacco Co.",
"TotalAmount": 541.25,
"NeedsApproval": true,
"CustomerId": "cust_ycoxvf",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶mhbp0055.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | senderName:
Expected: "Universal Leaf Tobacco Company"
Actual: "Universal Leaf Tobacco Company, Inc."
|
Task Input |
{"Name":"mhbp0055.png","Invoice":"https://storage.googleapis.com/llm-test-files/14e8c1c6be0d4ce671f442740d8722ea0babf52ae88e8c461787f074849a5a6b"} |
System Output |
{"Name":"mhbp0055.png_ftZ","BuyerName":"PM AUSTRALIA - C/O PM USA","CustomerId":"cust_yefulg","SenderName":"Universal Leaf Tobacco Company, Inc.","TotalAmount":258,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[\"60.00\",\"180.00\",\"19.00\",\"57.00\",\"7.00\",\"21.00\",\"258.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: PM AUSTRALIA - C/O PM USA
Actual: PM AUSTRALIA - C/O PM USA
|
?Total Correct? |
✅
Correct
Expected: 258
Actual: 258
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_yefulg
Actual: cust_yefulg
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Universal Leaf Tobacco Company
Actual: Universal Leaf Tobacco Company, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/14e8c1c6be0d4ce671f442740d8722ea0babf52ae88e8c461787f074849a5a6b) |
?Partial Extraction Match? |
senderName:
Expected: "Universal Leaf Tobacco Company"
Actual: "Universal Leaf Tobacco Company, Inc."
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Universal Leaf Tobacco Company",
"BuyerName": "PM AUSTRALIA - C/O PM USA",
"TotalAmount": 258,
"NeedsApproval": false,
"CustomerId": "cust_yefulg",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶mjbw0129.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"mjbw0129.png","Invoice":"https://storage.googleapis.com/llm-test-files/6ed0546d4d539773b0dab6c8a8ef0a2a2517b24bf45f19fc6fc661041bb2457e"} |
System Output |
{"Name":"mjbw0129.png_wkE","BuyerName":"Lorillard Media Services","CustomerId":"cust_ojbuny","SenderName":"The Lamar Corporation","TotalAmount":366.67,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[440.00, 73.33, 366.67]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Media Services
Actual: Lorillard Media Services
|
?Total Correct? |
✅
Correct
Expected: 366.67
Actual: 366.67
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_ojbuny
Actual: cust_ojbuny
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: The LAMAR Corporation
Actual: The Lamar Corporation
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/6ed0546d4d539773b0dab6c8a8ef0a2a2517b24bf45f19fc6fc661041bb2457e) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "The LAMAR Corporation",
"BuyerName": "Lorillard Media Services",
"TotalAmount": 366.67,
"NeedsApproval": true,
"CustomerId": "cust_ojbuny",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶mjfm0044.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"mjfm0044.png","Invoice":"https://storage.googleapis.com/llm-test-files/63afae46f50cfbc252b7232ec4b5d35cefe986d02520d3c20052a5f582d32119"} |
System Output |
{"Name":"mjfm0044.png_vx2","BuyerName":"Tobacco Institute","CustomerId":"cust_bvrtwu","SenderName":"Sparkman and Bartholomew Associates, Inc.","TotalAmount":8282.76,"FilterResult":"Valid — Legible invoice; no tampering or malicious content observed.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[392.00,2240.00,889.41,2800.16,439.20,980.00,73.15,7813.92,468.84,8282.76]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Tobacco Institute
Actual: Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 8282.76
Actual: 8282.76
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_bvrtwu
Actual: cust_bvrtwu
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Sparkman and Bartholomew Associates, Inc.
Actual: Sparkman and Bartholomew Associates, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/63afae46f50cfbc252b7232ec4b5d35cefe986d02520d3c20052a5f582d32119) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Sparkman and Bartholomew Associates, Inc.",
"BuyerName": "Tobacco Institute",
"TotalAmount": 8282.76,
"NeedsApproval": true,
"CustomerId": "cust_bvrtwu",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶mjpy0094.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"mjpy0094.png","Invoice":"https://storage.googleapis.com/llm-test-files/072d50b4113af9805c711a4007948411a142b635942137d11723fa0043a7a497"} |
System Output |
{"Name":"mjpy0094.png_YxL","BuyerName":"Lorillard, Inc.","CustomerId":"cust_moodww","SenderName":"IIT Research Institute","TotalAmount":5500,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$5,500.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard, Inc.
Actual: Lorillard, Inc.
|
?Total Correct? |
✅
Correct
Expected: 5500
Actual: 5500
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_moodww
Actual: cust_moodww
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: IIT Research Institute
Actual: IIT Research Institute
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/072d50b4113af9805c711a4007948411a142b635942137d11723fa0043a7a497) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "IIT Research Institute",
"BuyerName": "Lorillard, Inc.",
"TotalAmount": 5500,
"NeedsApproval": true,
"CustomerId": "cust_moodww",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶mpmb0100.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"mpmb0100.png","Invoice":"https://storage.googleapis.com/llm-test-files/421fec55f4ec8517c154291ea48fe56367f331cd990430c03aa566ab4dfa5d7d"} |
System Output |
{"Name":"mpmb0100.png_mSP","BuyerName":"McLane Southern California, Inc","CustomerId":"cust_bdyexs","SenderName":"R.J. Reynolds Tobacco Company","TotalAmount":17069.73,"FilterResult":"Valid — legible invoice, no tampering or malicious content observed.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[335.70,335.70,335.70,335.70,14435.10,14435.10,14435.10,14435.10,-28870.20,28870.20,-938.28,-938.28,26993.64,-377.91,-9546.00,-1316.19,17069.73,18385.92]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: MCLANE SOUTHERN CALIFORNIA, INC
Actual: McLane Southern California, Inc
|
?Total Correct? |
✅
Correct
Expected: 17069.73
Actual: 17069.73
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_bdyexs
Actual: cust_bdyexs
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: R.J. Reynolds Tobacco Company
Actual: R.J. Reynolds Tobacco Company
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/421fec55f4ec8517c154291ea48fe56367f331cd990430c03aa566ab4dfa5d7d) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "R.J. Reynolds Tobacco Company",
"BuyerName": "MCLANE SOUTHERN CALIFORNIA, INC",
"TotalAmount": 17069.73,
"NeedsApproval": true,
"CustomerId": "cust_bdyexs",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶mzkd0148.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"mzkd0148.png","Invoice":"https://storage.googleapis.com/llm-test-files/15bff433b71ac603842098c91154ad2e077db9d3adc8eee0bef4ab697ad7aac6"} |
System Output |
{"Name":"mzkd0148.png_1Sm","BuyerName":"The Tobacco Institute","CustomerId":"cust_ecmvpc","SenderName":"MAC SYSTEMS","TotalAmount":1932.63,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[1548.51, 67.61, 67.61, 36.00, 36.00, 70.45, 70.45, 36.00, 1932.63]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Tobacco Institute
Actual: The Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 1932.63
Actual: 1932.63
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_ecmvpc
Actual: cust_ecmvpc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: MAC SYSTEMS
Actual: MAC SYSTEMS
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/15bff433b71ac603842098c91154ad2e077db9d3adc8eee0bef4ab697ad7aac6) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "MAC SYSTEMS",
"BuyerName": "The Tobacco Institute",
"TotalAmount": 1932.63,
"NeedsApproval": true,
"CustomerId": "cust_ecmvpc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶nfyb0108.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | buyerName:
Expected: "Dr. J. Daniel Heck, Lorillard, Inc."
Actual: "Lorillard, Inc."
|
Task Input |
{"Name":"nfyb0108.png","Invoice":"https://storage.googleapis.com/llm-test-files/d4dede0dcbbead8b2fc3701a0a0ff57406001603340b6d4fb736cc5752436c7f"} |
System Output |
{"Name":"nfyb0108.png_4r1","BuyerName":"Lorillard, Inc.","CustomerId":"cust_cvpqaj","SenderName":"Hazleton Laboratories America, Inc.","TotalAmount":1800,"FilterResult":"Valid — legible invoice; no tampering or malicious instructions observed.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$1,800.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Dr. J. Daniel Heck, Lorillard, Inc.
Actual: Lorillard, Inc.
|
?Total Correct? |
✅
Correct
Expected: 1800
Actual: 1800
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_cvpqaj
Actual: cust_cvpqaj
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Hazleton Laboratories America, Inc.
Actual: Hazleton Laboratories America, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/d4dede0dcbbead8b2fc3701a0a0ff57406001603340b6d4fb736cc5752436c7f) |
?Partial Extraction Match? |
buyerName:
Expected: "Dr. J. Daniel Heck, Lorillard, Inc."
Actual: "Lorillard, Inc."
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Hazleton Laboratories America, Inc.",
"BuyerName": "Dr. J. Daniel Heck, Lorillard, Inc.",
"TotalAmount": 1800,
"NeedsApproval": true,
"CustomerId": "cust_cvpqaj",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶nlbw0181.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"nlbw0181.png","Invoice":"https://storage.googleapis.com/llm-test-files/0e7f9d72fa195ae35b7512a1d9e4f5bad231a5c20f718387dd254ea0fee359fc"} |
System Output |
{"Name":"nlbw0181.png_gIs","BuyerName":"Lorillard Tobacco Company","CustomerId":"cust_uzmkjy","SenderName":"Management Science Associates, Inc.","TotalAmount":493,"FilterResult":"Valid – legible, appears authentic, no malicious instructions detected.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$493.00\",\"$493.00\",\"$493 00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Tobacco Company
Actual: Lorillard Tobacco Company
|
?Total Correct? |
✅
Correct
Expected: 493
Actual: 493
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_uzmkjy
Actual: cust_uzmkjy
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Management Science Associates, Inc.
Actual: Management Science Associates, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/0e7f9d72fa195ae35b7512a1d9e4f5bad231a5c20f718387dd254ea0fee359fc) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Management Science Associates, Inc.",
"BuyerName": "Lorillard Tobacco Company",
"TotalAmount": 493,
"NeedsApproval": true,
"CustomerId": "cust_uzmkjy",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶nqpl0094.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | buyerName:
Expected: "Duke Univ Private Adjudic..."
Actual: "Duke University Private Adjudic..."
|
Task Input |
{"Name":"nqpl0094.png","Invoice":"https://storage.googleapis.com/llm-test-files/bd4ff4fb08001901ab51029bfad240eacd8e9a6871f8e5e54b77579aa1071cda"} |
System Output |
{"Name":"nqpl0094.png_Qxz","BuyerName":"Duke University Private Adjudication Center","CustomerId":"cust_dyhvvy","SenderName":"Airborne Express","TotalAmount":38,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[\"$38.00\",\"38.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Duke Univ Private Adjudic...
Actual: Duke University Private Adjudic...
|
?Total Correct? |
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_dyhvvy
Actual: cust_dyhvvy
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Airborne Express
Actual: Airborne Express
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/bd4ff4fb08001901ab51029bfad240eacd8e9a6871f8e5e54b77579aa1071cda) |
?Partial Extraction Match? |
buyerName:
Expected: "Duke Univ Private Adjudic..."
Actual: "Duke University Private Adjudic..."
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Airborne Express",
"BuyerName": "Duke Univ Private Adjudication Ctr",
"TotalAmount": 38,
"NeedsApproval": false,
"CustomerId": "cust_dyhvvy",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶nsjj0087.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | buyerName:
Expected: "R. J. Reynolds Tobacco Company, Bowman Gray Technical Center"
Actual: "R. J. Reynolds Tobacco Company"
senderName:
Expected: "Battelle Memorial Institute (Pacific Northwest Laboratories)"
Actual: "Battelle Pacific Northwest Laboratories"
|
Task Input |
{"Name":"nsjj0087.png","Invoice":"https://storage.googleapis.com/llm-test-files/646029561e556fcdc06948a23eef580728008ac6a28685ea62873984c47e43f1"} |
System Output |
{"Name":"nsjj0087.png_20w","BuyerName":"R. J. Reynolds Tobacco Company","CustomerId":"cust_zuaxgr","SenderName":"Battelle Pacific Northwest Laboratories","TotalAmount":115754,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$107,879.00\",\"$7,875.00\",\"$115,754.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: R. J. Reynolds Tobacco Company, Bowman Gray Technical Center
Actual: R. J. Reynolds Tobacco Company
|
?Total Correct? |
✅
Correct
Expected: 115754
Actual: 115754
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_zuaxgr
Actual: cust_zuaxgr
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Battelle Memorial Institute (Pacific Northwest Laboratories)
Actual: Battelle Pacific Northwest Laboratories
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/646029561e556fcdc06948a23eef580728008ac6a28685ea62873984c47e43f1) |
?Partial Extraction Match? |
buyerName:
Expected: "R. J. Reynolds Tobacco Company, Bowman Gray Technical Center"
Actual: "R. J. Reynolds Tobacco Company"
senderName:
Expected: "Battelle Memorial Institute (Pacific Northwest Laboratories)"
Actual: "Battelle Pacific Northwest Laboratories"
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Battelle Memorial Institute (Pacific Northwest Laboratories)",
"BuyerName": "R. J. Reynolds Tobacco Company, Bowman Gray Technical Center",
"TotalAmount": 115754,
"NeedsApproval": true,
"CustomerId": "cust_zuaxgr",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶ntxf0107.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"ntxf0107.png","Invoice":"https://storage.googleapis.com/llm-test-files/c691e8c82a8b5e37cce9b060916718f4a5d3dd74196faf6f1a6fbc74c3b0978e"} |
System Output |
{"Name":"ntxf0107.png_kwc","BuyerName":"Fleishman Hillard","CustomerId":"cust_dyhvvy","SenderName":"Airborne Express","TotalAmount":8.5,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[\"8.50\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: FLEISHMAN HILLARD
Actual: Fleishman Hillard
|
?Total Correct? |
✅
Correct
Expected: 8.5
Actual: 8.5
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_dyhvvy
Actual: cust_dyhvvy
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: AiRBORNE EXPRESS
Actual: Airborne Express
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/c691e8c82a8b5e37cce9b060916718f4a5d3dd74196faf6f1a6fbc74c3b0978e) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "AiRBORNE EXPRESS",
"BuyerName": "FLEISHMAN HILLARD",
"TotalAmount": 8.5,
"NeedsApproval": false,
"CustomerId": "cust_dyhvvy",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶nzjh0042.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"nzjh0042.png","Invoice":"https://storage.googleapis.com/llm-test-files/c5faada12f2c469bae0337810a7435caff950745d4a7723226f249fb303c126c"} |
System Output |
{"Name":"nzjh0042.png_urk","BuyerName":"Tobacco Institute","CustomerId":"cust_dyhvvy","SenderName":"Airborne Express","TotalAmount":442.49,"FilterResult":"Valid invoice (legible, no tampering/malicious content detected).","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[11.73, 11.73, 58.46, 23.46, 58.46, 442.49]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Tobacco Institute
Actual: Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 442.49
Actual: 442.49
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_dyhvvy
Actual: cust_dyhvvy
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Airborne Express
Actual: Airborne Express
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/c5faada12f2c469bae0337810a7435caff950745d4a7723226f249fb303c126c) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Airborne Express",
"BuyerName": "Tobacco Institute",
"TotalAmount": 442.49,
"NeedsApproval": false,
"CustomerId": "cust_dyhvvy",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶pjbl0010.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"pjbl0010.png","Invoice":"https://storage.googleapis.com/llm-test-files/7737fa4fc89c280193a2e38700b7c6b622a76879051d2a2f7d05062fb156bb11"} |
System Output |
{"Name":"pjbl0010.png_wtA","BuyerName":"Lorillard Research Center","CustomerId":"cust_dnwalc","SenderName":"Microbiological Associates Inc.","TotalAmount":2002.24,"FilterResult":"Valid — legible; no tampering or malicious content detected.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$2,002.24\",\"2,002.24\",\"2,002.24\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Research Center
Actual: Lorillard Research Center
|
?Total Correct? |
✅
Correct
Expected: 2002.24
Actual: 2002.24
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_dnwalc
Actual: cust_dnwalc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Microbiological Associates Inc.
Actual: Microbiological Associates Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/7737fa4fc89c280193a2e38700b7c6b622a76879051d2a2f7d05062fb156bb11) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Microbiological Associates Inc.",
"BuyerName": "Lorillard Research Center",
"TotalAmount": 2002.24,
"NeedsApproval": true,
"CustomerId": "cust_dnwalc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶pjpk0014.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"pjpk0014.png","Invoice":"https://storage.googleapis.com/llm-test-files/a2fcce37fde47d8073fe3ff56c61e69795bb12b1f8a6e260f25ec6e3b8ec5f74"} |
System Output |
{"Name":"pjpk0014.png_dJn","BuyerName":"Philip Morris Incorporated","CustomerId":"cust_ujwjmw","SenderName":"Gary M. Reynolds & Assoc.","TotalAmount":163972.84,"FilterResult":"Valid — invoice appears authentic and legible; no tampering or malicious content observed.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[9432.89, 5900.00, 47.00, 90869.85, 4978.70, 20507.95, 32236.45, 163972.84]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Philip Morris Incorporated
Actual: Philip Morris Incorporated
|
?Total Correct? |
✅
Correct
Expected: 163972.84
Actual: 163972.84
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_ujwjmw
Actual: cust_ujwjmw
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Gary M. Reynolds & Assoc.
Actual: Gary M. Reynolds & Assoc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/a2fcce37fde47d8073fe3ff56c61e69795bb12b1f8a6e260f25ec6e3b8ec5f74) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Gary M. Reynolds & Assoc.",
"BuyerName": "Philip Morris Incorporated",
"TotalAmount": 163972.84,
"NeedsApproval": true,
"CustomerId": "cust_ujwjmw",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶pjvc0030.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"pjvc0030.png","Invoice":"https://storage.googleapis.com/llm-test-files/a620b4666a262826fb927d154d617c558d8f4172fe11cf683c93f4fff70753d9"} |
System Output |
{"Name":"pjvc0030.png_W9o","BuyerName":"The Tobacco Institute","CustomerId":"cust_dcsbzs","SenderName":"Newsletter Services, Inc.","TotalAmount":396.07,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$254.25\",\"$15.25\",\"$126.57\",\"$396.07\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: THE TOBACCO INSTITUTE
Actual: The Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 396.07
Actual: 396.07
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_dcsbzs
Actual: cust_dcsbzs
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: newsletter services, inc.
Actual: Newsletter Services, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/a620b4666a262826fb927d154d617c558d8f4172fe11cf683c93f4fff70753d9) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "newsletter services, inc.",
"BuyerName": "THE TOBACCO INSTITUTE",
"TotalAmount": 396.07,
"NeedsApproval": true,
"CustomerId": "cust_dcsbzs",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶pjxf0107.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"pjxf0107.png","Invoice":"https://storage.googleapis.com/llm-test-files/e3c27f680d1b797062c20e22db7551af1ef4915c6e98c1610e9e75bef10d0520"} |
System Output |
{"Name":"pjxf0107.png_Rq6","BuyerName":"Covington & Burling","CustomerId":"cust_bpuroc","SenderName":"Healthy Buildings International, Inc.","TotalAmount":3421,"FilterResult":"Valid — legible, no tampering or malicious content observed","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$2,492.00\",\"610.00\",\"198.00\",\"21.00\",\"100.00\",\"$3,421.00\",\"$3,421.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Covington & Burling
Actual: Covington & Burling
|
?Total Correct? |
✅
Correct
Expected: 3421
Actual: 3421
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_bpuroc
Actual: cust_bpuroc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Healthy Buildings International, Inc.
Actual: Healthy Buildings International, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/e3c27f680d1b797062c20e22db7551af1ef4915c6e98c1610e9e75bef10d0520) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Healthy Buildings International, Inc.",
"BuyerName": "Covington & Burling",
"TotalAmount": 3421,
"NeedsApproval": true,
"CustomerId": "cust_bpuroc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶pkhg0049.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"pkhg0049.png","Invoice":"https://storage.googleapis.com/llm-test-files/858b9de3ebc5cacebf30dfb703f209051b2a7bd5ece9cc5357ff44b2fcdd4741"} |
System Output |
{"Name":"pkhg0049.png_Ab5","BuyerName":"Philip Morris","CustomerId":"cust_yihnvm","SenderName":"Home Testing Institute","TotalAmount":37405,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$1,210\",\"$37,405.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Philip Morris
Actual: Philip Morris
|
?Total Correct? |
✅
Correct
Expected: 37405
Actual: 37405
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_yihnvm
Actual: cust_yihnvm
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Home Testing Institute
Actual: Home Testing Institute
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/858b9de3ebc5cacebf30dfb703f209051b2a7bd5ece9cc5357ff44b2fcdd4741) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Home Testing Institute",
"BuyerName": "Philip Morris",
"TotalAmount": 37405,
"NeedsApproval": true,
"CustomerId": "cust_yihnvm",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶ppnx0147.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"ppnx0147.png","Invoice":"https://storage.googleapis.com/llm-test-files/c0e981897d82a8008087cc0987954ac658bbf59191e7d67687e96b6ac4a282dd"} |
System Output |
{"Name":"ppnx0147.png_1ez","BuyerName":"The Tobacco Institute","CustomerId":"cust_xezwxc","SenderName":"Broad Ripple Trophy Center","TotalAmount":56.18,"FilterResult":"Valid - clear, legible invoice; no tampering or malicious content observed.","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[26.75, 53.50, 53.50, 2.68, 56.18]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Tobacco Institute
Actual: The Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 56.18
Actual: 56.18
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_xezwxc
Actual: cust_xezwxc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Broad Ripple Trophy Center
Actual: Broad Ripple Trophy Center
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/c0e981897d82a8008087cc0987954ac658bbf59191e7d67687e96b6ac4a282dd) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Broad Ripple Trophy Center",
"BuyerName": "The Tobacco Institute",
"TotalAmount": 56.18,
"NeedsApproval": false,
"CustomerId": "cust_xezwxc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶ptkv0039.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"ptkv0039.png","Invoice":"https://storage.googleapis.com/llm-test-files/89b4f957fd74cf40fa688aadd8f89f4058c57b35383e24e639fb083ff287a762"} |
System Output |
{"Name":"ptkv0039.png_idC","BuyerName":"LEO BURNETT USA","CustomerId":"cust_eghisc","SenderName":"ELLER MEDIA COMPANY","TotalAmount":1458.27,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"1,750.00\",\"(291.73)\",\"1,458.27\",\"1,458.27\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Leo Burnett USA
Actual: LEO BURNETT USA
|
?Total Correct? |
✅
Correct
Expected: 1458.27
Actual: 1458.27
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_eghisc
Actual: cust_eghisc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Eller Media Company
Actual: ELLER MEDIA COMPANY
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/89b4f957fd74cf40fa688aadd8f89f4058c57b35383e24e639fb083ff287a762) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Eller Media Company",
"BuyerName": "Leo Burnett USA",
"TotalAmount": 1458.27,
"NeedsApproval": true,
"CustomerId": "cust_eghisc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶pyhj0042.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"pyhj0042.png","Invoice":"https://storage.googleapis.com/llm-test-files/6ae71fa9959f9f3a65e054abbc33cfad03efa7d4a6d67e567ccc3d6130bb9b7e"} |
System Output |
{"Name":"pyhj0042.png_bjP","BuyerName":"Tobacco Institute","CustomerId":"cust_wywxbl","SenderName":"Pitney Bowes","TotalAmount":93.09,"FilterResult":"Valid - Legible Pitney Bowes rental invoice; no tampering/malicious content observed.","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[93.09, 87.00, 3.48, 2.61, 0.00, 6.09, 1000.00]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Tobacco Institute
Actual: Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 93.09
Actual: 93.09
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_wywxbl
Actual: cust_wywxbl
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Pitney Bowes
Actual: Pitney Bowes
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/6ae71fa9959f9f3a65e054abbc33cfad03efa7d4a6d67e567ccc3d6130bb9b7e) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Pitney Bowes",
"BuyerName": "Tobacco Institute",
"TotalAmount": 93.09,
"NeedsApproval": false,
"CustomerId": "cust_wywxbl",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶pymp0031.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"pymp0031.png","Invoice":"https://storage.googleapis.com/llm-test-files/eb3cc2650f0ecca86536688cb6e0cf2013d32e20cc7cd5314d4c750607290482"} |
System Output |
{"Name":"pymp0031.png_fEV","BuyerName":"Tobacco Institute","CustomerId":"cust_ynbhwy","SenderName":"Temporary Resources, Inc.","TotalAmount":470.4,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[470.40, 12.80]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Tobacco Institute
Actual: Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 470.4
Actual: 470.4
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_ynbhwy
Actual: cust_ynbhwy
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Temporary Resources, Inc.
Actual: Temporary Resources, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/eb3cc2650f0ecca86536688cb6e0cf2013d32e20cc7cd5314d4c750607290482) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Temporary Resources, Inc.",
"BuyerName": "Tobacco Institute",
"TotalAmount": 470.4,
"NeedsApproval": false,
"CustomerId": "cust_ynbhwy",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶qggm0045.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"qggm0045.png","Invoice":"https://storage.googleapis.com/llm-test-files/3534b711bc796ed09dce914d95cf6de10342c9871d10d41190fe261cda5fce4c"} |
System Output |
{"Name":"qggm0045.png_RVT","BuyerName":"Tobacco Institute","CustomerId":"cust_basfwy","SenderName":"Fannon-Luers Associates Inc.","TotalAmount":144,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"72.00\", \"72.00\", \"0.00\", \"144.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Tobacco Institute
Actual: Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 144
Actual: 144
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_basfwy
Actual: cust_basfwy
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Fannon-Luers Associates Inc.
Actual: Fannon-Luers Associates Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/3534b711bc796ed09dce914d95cf6de10342c9871d10d41190fe261cda5fce4c) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Fannon-Luers Associates Inc.",
"BuyerName": "Tobacco Institute",
"TotalAmount": 144,
"NeedsApproval": true,
"CustomerId": "cust_basfwy",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶qgmh0008.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"qgmh0008.png","Invoice":"https://storage.googleapis.com/llm-test-files/7a1bb5c6aadbd7b6c511d97b35c5aae636e9014291bbcb376fe4456bea8a7145"} |
System Output |
{"Name":"qgmh0008.png_sm_","BuyerName":"Philip Morris USA","CustomerId":"cust_sofssp","SenderName":"Labstat International Inc.","TotalAmount":468870,"FilterResult":"Valid invoice – legible, no tampering or malicious content detected.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$294,764.00\",\"$169,736.00\",\"$4,370.00\",\"$468,870.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Philip Morris USA
Actual: Philip Morris USA
|
?Total Correct? |
✅
Correct
Expected: 468870
Actual: 468870
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_sofssp
Actual: cust_sofssp
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Labstat International Inc.
Actual: Labstat International Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/7a1bb5c6aadbd7b6c511d97b35c5aae636e9014291bbcb376fe4456bea8a7145) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Labstat International Inc.",
"BuyerName": "Philip Morris USA",
"TotalAmount": 468870,
"NeedsApproval": true,
"CustomerId": "cust_sofssp",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶qkmp0115.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | senderName:
Expected: "Litton Bionetics"
Actual: "Litton Bionetics, Inc."
|
Task Input |
{"Name":"qkmp0115.png","Invoice":"https://storage.googleapis.com/llm-test-files/5c181c3bdca3373dd3dbaaa62b676948515daa206c146ff9fde2a5dab87e8a72"} |
System Output |
{"Name":"qkmp0115.png_Mrv","BuyerName":"Lorillard Research Center","CustomerId":"cust_xvhcey","SenderName":"Litton Bionetics, Inc.","TotalAmount":900,"FilterResult":"Valid invoice: legible, consistent formatting, authentic fields, no tampering or malicious content observed.","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[\"900.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Research Center
Actual: Lorillard Research Center
|
?Total Correct? |
✅
Correct
Expected: 900
Actual: 900
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_xvhcey
Actual: cust_xvhcey
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Litton Bionetics
Actual: Litton Bionetics, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/5c181c3bdca3373dd3dbaaa62b676948515daa206c146ff9fde2a5dab87e8a72) |
?Partial Extraction Match? |
senderName:
Expected: "Litton Bionetics"
Actual: "Litton Bionetics, Inc."
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Litton Bionetics",
"BuyerName": "Lorillard Research Center",
"TotalAmount": 900,
"NeedsApproval": false,
"CustomerId": "cust_xvhcey",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶qmjp0115.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"qmjp0115.png","Invoice":"https://storage.googleapis.com/llm-test-files/b2a3806f9e2f67b61f49d3a8faf304b3b8f5eb65cffee867f650504f77b140e2"} |
System Output |
{"Name":"qmjp0115.png_Gnd","BuyerName":"Lorillard Research Center","CustomerId":"cust_cvpqaj","SenderName":"Hazleton Laboratories America, Inc.","TotalAmount":1350,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$1,350.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Research Center
Actual: Lorillard Research Center
|
?Total Correct? |
✅
Correct
Expected: 1350
Actual: 1350
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_cvpqaj
Actual: cust_cvpqaj
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Hazleton Laboratories America, Inc.
Actual: Hazleton Laboratories America, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/b2a3806f9e2f67b61f49d3a8faf304b3b8f5eb65cffee867f650504f77b140e2) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Hazleton Laboratories America, Inc.",
"BuyerName": "Lorillard Research Center",
"TotalAmount": 1350,
"NeedsApproval": true,
"CustomerId": "cust_cvpqaj",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶qnvf0025.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"qnvf0025.png","Invoice":"https://storage.googleapis.com/llm-test-files/4df945d385d80379918b9c16cf2a8647814ddba79781b4c22f6a3e63caf0c731"} |
System Output |
{"Name":"qnvf0025.png_pwb","BuyerName":"Philip Morris Incorporated","CustomerId":"cust_kwuqnj","SenderName":"The Denver Union Terminal Railway Company","TotalAmount":715,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[715.00, 715.00, 715.00]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Philip Morris Incorporated
Actual: Philip Morris Incorporated
|
?Total Correct? |
✅
Correct
Expected: 715
Actual: 715
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_kwuqnj
Actual: cust_kwuqnj
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: The Denver Union Terminal Railway Company
Actual: The Denver Union Terminal Railway Company
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/4df945d385d80379918b9c16cf2a8647814ddba79781b4c22f6a3e63caf0c731) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "The Denver Union Terminal Railway Company",
"BuyerName": "Philip Morris Incorporated",
"TotalAmount": 715,
"NeedsApproval": false,
"CustomerId": "cust_kwuqnj",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶qqbl0010.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | buyerName:
Expected: "Dr. J. Daniel Heck, Lorillard, Inc."
Actual: "Lorillard, Inc."
|
Task Input |
{"Name":"qqbl0010.png","Invoice":"https://storage.googleapis.com/llm-test-files/214dd80b164c7106b95ad1d981186fa42551b8aef395299a47b36772e971b81b"} |
System Output |
{"Name":"qqbl0010.png_KTB","BuyerName":"Lorillard, Inc.","CustomerId":"cust_cvpqaj","SenderName":"Hazleton Laboratories America, Inc.","TotalAmount":2975,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$2,975.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Dr. J. Daniel Heck, Lorillard, Inc.
Actual: Lorillard, Inc.
|
?Total Correct? |
✅
Correct
Expected: 2975
Actual: 2975
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_cvpqaj
Actual: cust_cvpqaj
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Hazleton Laboratories America, Inc.
Actual: Hazleton Laboratories America, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/214dd80b164c7106b95ad1d981186fa42551b8aef395299a47b36772e971b81b) |
?Partial Extraction Match? |
buyerName:
Expected: "Dr. J. Daniel Heck, Lorillard, Inc."
Actual: "Lorillard, Inc."
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Hazleton Laboratories America, Inc.",
"BuyerName": "Dr. J. Daniel Heck, Lorillard, Inc.",
"TotalAmount": 2975,
"NeedsApproval": true,
"CustomerId": "cust_cvpqaj",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶qrnc0051.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"qrnc0051.png","Invoice":"https://storage.googleapis.com/llm-test-files/e552f9feb4fe5e5cfb6ddd494c4914d4ed46364b6779a8680eef0e65e698fdf1"} |
System Output |
{"Name":"qrnc0051.png_cY_","BuyerName":"Tobacco Institute","CustomerId":"cust_mbxjon","SenderName":"Composition Systems Incorporated","TotalAmount":2470.79,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[1677.29,224.40,53.25,22.75,39.10,454.00,2470.79]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Tobacco Institute
Actual: Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 2470.79
Actual: 2470.79
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_mbxjon
Actual: cust_mbxjon
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Composition Systems Incorporated
Actual: Composition Systems Incorporated
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/e552f9feb4fe5e5cfb6ddd494c4914d4ed46364b6779a8680eef0e65e698fdf1) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Composition Systems Incorporated",
"BuyerName": "Tobacco Institute",
"TotalAmount": 2470.79,
"NeedsApproval": true,
"CustomerId": "cust_mbxjon",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶qzwy0047.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | buyerName:
Expected: "The Tobacco Institute"
Actual: "The Tobacco Institute Inc"
senderName:
Expected: "Pitney Bowes"
Actual: "Pitney Bowes Inc"
|
Task Input |
{"Name":"qzwy0047.png","Invoice":"https://storage.googleapis.com/llm-test-files/1050836df5440ee7e5638ceeab7fdb68e5e5bcc2414b49bfb71457f60c0c7b57"} |
System Output |
{"Name":"qzwy0047.png_xHV","BuyerName":"The Tobacco Institute Inc","CustomerId":"cust_wywxbl","SenderName":"Pitney Bowes Inc","TotalAmount":98.74,"FilterResult":"Valid: Legible invoice; no tampering or malicious content detected.","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[98.74, 81.75, 12.00, 2.45, 0.90, 1.64, 4.99, 1135.00, 6.00, 98.74]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Tobacco Institute
Actual: The Tobacco Institute Inc
|
?Total Correct? |
✅
Correct
Expected: 98.74
Actual: 98.74
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_wywxbl
Actual: cust_wywxbl
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Pitney Bowes
Actual: Pitney Bowes Inc
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/1050836df5440ee7e5638ceeab7fdb68e5e5bcc2414b49bfb71457f60c0c7b57) |
?Partial Extraction Match? |
buyerName:
Expected: "The Tobacco Institute"
Actual: "The Tobacco Institute Inc"
senderName:
Expected: "Pitney Bowes"
Actual: "Pitney Bowes Inc"
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Pitney Bowes",
"BuyerName": "The Tobacco Institute",
"TotalAmount": 98.74,
"NeedsApproval": false,
"CustomerId": "cust_wywxbl",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶rhld0148.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"rhld0148.png","Invoice":"https://storage.googleapis.com/llm-test-files/c5c089e6166013f320de4252aecb3b79192771ce9c920f80eab396aa80a332b0"} |
System Output |
{"Name":"rhld0148.png_WyX","BuyerName":"The Tobacco Institute","CustomerId":"cust_bvrtwu","SenderName":"Sparkman and Bartholomew Associates, Inc.","TotalAmount":20581.27,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"1706.25\",\"1830.00\",\"1275.00\",\"900.00\",\"4131.25\",\"8939.00\",\"440.76\",\"194.03\",\"19416.29\",\"1164.98\",\"20581.27\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Tobacco Institute
Actual: The Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 20581.27
Actual: 20581.27
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_bvrtwu
Actual: cust_bvrtwu
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Sparkman and Bartholomew Associates, Inc.
Actual: Sparkman and Bartholomew Associates, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/c5c089e6166013f320de4252aecb3b79192771ce9c920f80eab396aa80a332b0) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Sparkman and Bartholomew Associates, Inc.",
"BuyerName": "The Tobacco Institute",
"TotalAmount": 20581.27,
"NeedsApproval": true,
"CustomerId": "cust_bvrtwu",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶rnfb0105.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"rnfb0105.png","Invoice":"https://storage.googleapis.com/llm-test-files/86a35fcf9f66f6a3fb83b0825bce28aa2d42516d65848375055dde3439fc1ef3"} |
System Output |
{"Name":"rnfb0105.png_T9e","BuyerName":"Lorillard Company","CustomerId":"cust_ybwqzc","SenderName":"Metropolitan Sunday Newspapers, Inc.","TotalAmount":30676.93,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[14924.64,12685.94,17670.00,15019.50,3495.87,2971.49,36090.51,30676.93]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Company
Actual: Lorillard Company
|
?Total Correct? |
✅
Correct
Expected: 30676.93
Actual: 30676.93
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_ybwqzc
Actual: cust_ybwqzc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Metropolitan Sunday Newspapers, Inc.
Actual: Metropolitan Sunday Newspapers, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/86a35fcf9f66f6a3fb83b0825bce28aa2d42516d65848375055dde3439fc1ef3) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Metropolitan Sunday Newspapers, Inc.",
"BuyerName": "Lorillard Company",
"TotalAmount": 30676.93,
"NeedsApproval": true,
"CustomerId": "cust_ybwqzc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶rpbd0105.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"rpbd0105.png","Invoice":"https://storage.googleapis.com/llm-test-files/bfc06f52bb86f0e221bc501ad23a2981e7639bb4014f8df5d10ef5e48869d458"} |
System Output |
{"Name":"rpbd0105.png_R69","BuyerName":"Lorillard Company","CustomerId":"cust_ybwqzc","SenderName":"Metropolitan Sunday Newspapers, Inc.","TotalAmount":23078.35,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"3441.00\",\"2924.85\",\"23710.00\",\"20153.50\",\"27151.00\",\"23078.35\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Company
Actual: Lorillard Company
|
?Total Correct? |
✅
Correct
Expected: 23078.35
Actual: 23078.35
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_ybwqzc
Actual: cust_ybwqzc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Metropolitan Sunday Newspapers, Inc.
Actual: Metropolitan Sunday Newspapers, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/bfc06f52bb86f0e221bc501ad23a2981e7639bb4014f8df5d10ef5e48869d458) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Metropolitan Sunday Newspapers, Inc.",
"BuyerName": "Lorillard Company",
"TotalAmount": 23078.35,
"NeedsApproval": true,
"CustomerId": "cust_ybwqzc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶rqkh0042.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"rqkh0042.png","Invoice":"https://storage.googleapis.com/llm-test-files/87396abea0646ccab388af229a618d62f14a5cad4ca9070edc581160f0c357c6"} |
System Output |
{"Name":"rqkh0042.png_3IG","BuyerName":"The Tobacco Institute","CustomerId":"cust_dyhvvy","SenderName":"Airborne Express","TotalAmount":44.4,"FilterResult":"Valid - clear, legible, no tampering or malicious content observed","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[44.40, 8.50, 0.38, 8.88, 8.50, 0.38, 8.88]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Tobacco Institute
Actual: The Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 44.4
Actual: 44.4
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_dyhvvy
Actual: cust_dyhvvy
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: AiRBORNE EXPRESS
Actual: Airborne Express
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/87396abea0646ccab388af229a618d62f14a5cad4ca9070edc581160f0c357c6) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "AiRBORNE EXPRESS",
"BuyerName": "The Tobacco Institute",
"TotalAmount": 44.4,
"NeedsApproval": false,
"CustomerId": "cust_dyhvvy",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶rtnb0108.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | buyerName:
Expected: "Dr. J. D. Heck, Lorillard Research Center"
Actual: "Lorillard Research Center"
|
Task Input |
{"Name":"rtnb0108.png","Invoice":"https://storage.googleapis.com/llm-test-files/958b91875e55341e8c5fcf7a393edd29f3d7841f4d804d6ed55a4bf63c561c9b"} |
System Output |
{"Name":"rtnb0108.png_Rr5","BuyerName":"Lorillard Research Center","CustomerId":"cust_cvpqaj","SenderName":"Hazleton Laboratories America, Inc.","TotalAmount":1300,"FilterResult":"Valid — clear, legible, no tampering detected; safe to proceed.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$1,300.00\",\"1,300.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Dr. J. D. Heck, Lorillard Research Center
Actual: Lorillard Research Center
|
?Total Correct? |
✅
Correct
Expected: 1300
Actual: 1300
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_cvpqaj
Actual: cust_cvpqaj
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Hazleton Laboratories America, Inc.
Actual: Hazleton Laboratories America, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/958b91875e55341e8c5fcf7a393edd29f3d7841f4d804d6ed55a4bf63c561c9b) |
?Partial Extraction Match? |
buyerName:
Expected: "Dr. J. D. Heck, Lorillard Research Center"
Actual: "Lorillard Research Center"
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Hazleton Laboratories America, Inc.",
"BuyerName": "Dr. J. D. Heck, Lorillard Research Center",
"TotalAmount": 1300,
"NeedsApproval": true,
"CustomerId": "cust_cvpqaj",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶rxbw0181.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"rxbw0181.png","Invoice":"https://storage.googleapis.com/llm-test-files/e33fb4efe7079211211b8d0963bd4bb3fad16be9633bb99e52e3d879e8adad68"} |
System Output |
{"Name":"rxbw0181.png_Bo3","BuyerName":"Lorillard Tobacco Company","CustomerId":"cust_uzmkjy","SenderName":"Management Science Associates, Inc.","TotalAmount":838,"FilterResult":"Valid: clear, legible, no tampering or malicious content detected.","ManagerEmail":"[email protected]","NeedsApproval":false,"BillingAmounts":"[\"$838.00\",\"$838.00\",\"$838.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Tobacco Company
Actual: Lorillard Tobacco Company
|
?Total Correct? |
✅
Correct
Expected: 838
Actual: 838
|
?Approval Correct? |
✅
Correct
Expected: false
Actual: false
|
?Customer ID Correct? |
✅
Correct
Expected: cust_uzmkjy
Actual: cust_uzmkjy
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Management Science Associates, Inc.
Actual: Management Science Associates, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/e33fb4efe7079211211b8d0963bd4bb3fad16be9633bb99e52e3d879e8adad68) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Management Science Associates, Inc.",
"BuyerName": "Lorillard Tobacco Company",
"TotalAmount": 838,
"NeedsApproval": false,
"CustomerId": "cust_uzmkjy",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶sgbx0063.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | senderName:
Expected: "FAIRCHILD PUBLICATIONS"
Actual: "Fairchild Publications, Division of Capital Cities Media Inc."
|
Task Input |
{"Name":"sgbx0063.png","Invoice":"https://storage.googleapis.com/llm-test-files/26186d6193a0e7cb10b160539764d2a587923b5e6cae6e64ee0a6e64607ac6c1"} |
System Output |
{"Name":"sgbx0063.png_GMK","BuyerName":"Lorillard","CustomerId":"cust_efstwl","SenderName":"Fairchild Publications, Division of Capital Cities Media Inc.","TotalAmount":6568.8,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[7728.00, 6568.80, 42.00]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: LORILLARD
Actual: Lorillard
|
?Total Correct? |
✅
Correct
Expected: 6568.8
Actual: 6568.8
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_efstwl
Actual: cust_efstwl
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: FAIRCHILD PUBLICATIONS
Actual: Fairchild Publications, Division of Capital Cities Media Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/26186d6193a0e7cb10b160539764d2a587923b5e6cae6e64ee0a6e64607ac6c1) |
?Partial Extraction Match? |
senderName:
Expected: "FAIRCHILD PUBLICATIONS"
Actual: "Fairchild Publications, Division of Capital Cities Media Inc."
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "FAIRCHILD PUBLICATIONS",
"BuyerName": "LORILLARD",
"TotalAmount": 6568.8,
"NeedsApproval": true,
"CustomerId": "cust_efstwl",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶snjp0107.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"snjp0107.png","Invoice":"https://storage.googleapis.com/llm-test-files/e66c2827f53378f724eca93bc90b5d903af9b4ae2210a07742357b58d9e3bc47"} |
System Output |
{"Name":"snjp0107.png_UjL","BuyerName":"Lorillard Media Services","CustomerId":"cust_jgjkzc","SenderName":"Home Viewer Publications, Inc.","TotalAmount":16995.75,"FilterResult":"Valid - invoice appears legitimate, legible, and safe","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"19995.00\",\"0.00\",\"0.00\",\"19995.00\",\"2999.25\",\"16995.75\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Media Services
Actual: Lorillard Media Services
|
?Total Correct? |
✅
Correct
Expected: 16995.75
Actual: 16995.75
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_jgjkzc
Actual: cust_jgjkzc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Home Viewer Publications, Inc.
Actual: Home Viewer Publications, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/e66c2827f53378f724eca93bc90b5d903af9b4ae2210a07742357b58d9e3bc47) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Home Viewer Publications, Inc.",
"BuyerName": "Lorillard Media Services",
"TotalAmount": 16995.75,
"NeedsApproval": true,
"CustomerId": "cust_jgjkzc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶srfw0051.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"srfw0051.png","Invoice":"https://storage.googleapis.com/llm-test-files/5c6cd284a934b058c6afe36663e00b592e4b1d0fddbd89412c5bb9c94f90bb92"} |
System Output |
{"Name":"srfw0051.png_5G_","BuyerName":"The Tobacco Institute","CustomerId":"cust_rnbufp","SenderName":"Peake Printers, Inc.","TotalAmount":6812.62,"FilterResult":"Valid invoice: legible, authentic, no tampering or malicious content.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"6427.000\",\"6427.00\",\"6427.00\",\"385.62\",\"6812.62\",\"6812.62\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Tobacco Institute
Actual: The Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 6812.62
Actual: 6812.62
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_rnbufp
Actual: cust_rnbufp
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Peake Printers, Inc.
Actual: Peake Printers, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/5c6cd284a934b058c6afe36663e00b592e4b1d0fddbd89412c5bb9c94f90bb92) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Peake Printers, Inc.",
"BuyerName": "The Tobacco Institute",
"TotalAmount": 6812.62,
"NeedsApproval": true,
"CustomerId": "cust_rnbufp",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶symf0145.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"symf0145.png","Invoice":"https://storage.googleapis.com/llm-test-files/7dee60ac8cd3c461050ed95dda1dbb59078d2feab280e2f47a734dd0b43b9ad1"} |
System Output |
{"Name":"symf0145.png_Hgg","BuyerName":"The Tobacco Institute","CustomerId":"cust_rnbufp","SenderName":"Peake Printers, Inc.","TotalAmount":5070,"FilterResult":"Valid: clear, legible, authentic; no tampering or malicious instructions detected.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"5070.000\",\"5070.00\",\"5070.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Tobacco Institute
Actual: The Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 5070
Actual: 5070
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_rnbufp
Actual: cust_rnbufp
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Peake Printers, Inc.
Actual: Peake Printers, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/7dee60ac8cd3c461050ed95dda1dbb59078d2feab280e2f47a734dd0b43b9ad1) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Peake Printers, Inc.",
"BuyerName": "The Tobacco Institute",
"TotalAmount": 5070,
"NeedsApproval": true,
"CustomerId": "cust_rnbufp",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶thvd0030.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"thvd0030.png","Invoice":"https://storage.googleapis.com/llm-test-files/019b303b796b1d7a5081b83cb9d856761eeaf1acbbb23571f2299df95179b8e6"} |
System Output |
{"Name":"thvd0030.png__X_","BuyerName":"The Tobacco Institute","CustomerId":"cust_zwcend","SenderName":"Beaver Press, Inc.","TotalAmount":494.53,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[283.55, 17.01, 193.97, 494.53]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Tobacco Institute
Actual: The Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 494.53
Actual: 494.53
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_zwcend
Actual: cust_zwcend
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Beaver Press, Inc.
Actual: Beaver Press, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/019b303b796b1d7a5081b83cb9d856761eeaf1acbbb23571f2299df95179b8e6) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Beaver Press, Inc.",
"BuyerName": "The Tobacco Institute",
"TotalAmount": 494.53,
"NeedsApproval": true,
"CustomerId": "cust_zwcend",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶tjwn0019.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"tjwn0019.png","Invoice":"https://storage.googleapis.com/llm-test-files/fb43ac563080be21fc8a1588d7735fdd70cead5682214940280dc6b6cef392c7"} |
System Output |
{"Name":"tjwn0019.png_J0f","BuyerName":"PHILIP MORRIS U.S.A.","CustomerId":"cust_iosnug","SenderName":"CYRK, INC.","TotalAmount":67798.32,"FilterResult":"Valid - clear, legible invoice; no tampering or malicious content detected.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"13,258.320\",\"13,258.32\",\"54,540.000\",\"54,540.00\",\"67,798.32\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: PHILIP MORRIS U.S.A.
Actual: PHILIP MORRIS U.S.A.
|
?Total Correct? |
✅
Correct
Expected: 67798.32
Actual: 67798.32
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_iosnug
Actual: cust_iosnug
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: CYRK, INC.
Actual: CYRK, INC.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/fb43ac563080be21fc8a1588d7735fdd70cead5682214940280dc6b6cef392c7) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "CYRK, INC.",
"BuyerName": "PHILIP MORRIS U.S.A.",
"TotalAmount": 67798.32,
"NeedsApproval": true,
"CustomerId": "cust_iosnug",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶tkff0071.png |
valid | ✅ | ❌ | ✅ | ✅ | 0.83 | |
Task Input |
{"Name":"tkff0071.png","Invoice":"https://storage.googleapis.com/llm-test-files/3f97a327d471538264d7a5c83be3754e3b5107bf6c670ad0ec744895eda46cb7"} |
System Output |
{"Name":"tkff0071.png_jFh","BuyerName":"Lorillard","CustomerId":"cust_tlzyjc","SenderName":"The McCall Publishing Co.","TotalAmount":47381.04,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[71100.00,56880.00,8532.00,48348.00,966.96,47381.04,8883.15]"} |
Explanation |
TotalAmount differs: expected 48348 vs actual 47381.04.
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: LORILLARD
Actual: Lorillard
|
?Total Correct? |
❌
Incorrect
Expected: 48348
Actual: 47381.04
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_tlzyjc
Actual: cust_tlzyjc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: The McCall Publishing Co.
Actual: The McCall Publishing Co.
|
?Input validation correct? |
✅ |
?Extractions correct? |
❌ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/3f97a327d471538264d7a5c83be3754e3b5107bf6c670ad0ec744895eda46cb7) |
?Partial Extraction Match? |
|
?Weighted score |
0.83 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "The McCall Publishing Co.",
"BuyerName": "LORILLARD",
"TotalAmount": 48348,
"NeedsApproval": true,
"CustomerId": "cust_tlzyjc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶tnfm0044.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"tnfm0044.png","Invoice":"https://storage.googleapis.com/llm-test-files/96b9c9b10a6c0721d7c6c2693aa7db9a51a937fe8a2ec55d536450f796bedc75"} |
System Output |
{"Name":"tnfm0044.png_clD","BuyerName":"The Tobacco Institute","CustomerId":"cust_dludiq","SenderName":"Sparkman and Bartholomew Associates Inc","TotalAmount":539.49,"FilterResult":"Valid - legible, authentic-looking, no tampering/malicious content detected","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$508.95\",\"30.54\",\"$539.49\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Tobacco Institute
Actual: The Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 539.49
Actual: 539.49
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_dludiq
Actual: cust_dludiq
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Sparkman and Bartholomew Associates Inc
Actual: Sparkman and Bartholomew Associates Inc
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/96b9c9b10a6c0721d7c6c2693aa7db9a51a937fe8a2ec55d536450f796bedc75) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Sparkman and Bartholomew Associates Inc",
"BuyerName": "The Tobacco Institute",
"TotalAmount": 539.49,
"NeedsApproval": true,
"CustomerId": "cust_dludiq",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶tphh0144.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"tphh0144.png","Invoice":"https://storage.googleapis.com/llm-test-files/e4671ab60d3c3719eb6090548e5d8160018fcb3926095e2893db18fb7d1fa035"} |
System Output |
{"Name":"tphh0144.png_D4q","BuyerName":"American Tobacco Company","CustomerId":"cust_reafqi","SenderName":"The Rowland Company, Inc.","TotalAmount":3986.47,"FilterResult":"Valid: Legible invoice scan; no tampering or malicious instructions detected.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$3,796.05\",\"$190.42\",\"$3,986.47\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: American Tobacco Company
Actual: American Tobacco Company
|
?Total Correct? |
✅
Correct
Expected: 3986.47
Actual: 3986.47
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_reafqi
Actual: cust_reafqi
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: The Rowland Company, Inc.
Actual: The Rowland Company, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/e4671ab60d3c3719eb6090548e5d8160018fcb3926095e2893db18fb7d1fa035) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "The Rowland Company, Inc.",
"BuyerName": "American Tobacco Company",
"TotalAmount": 3986.47,
"NeedsApproval": true,
"CustomerId": "cust_reafqi",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶tydw0051.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"tydw0051.png","Invoice":"https://storage.googleapis.com/llm-test-files/e864a8b66b451b74e272e4bb1b575edeb429a7494fabee5b66a221a0cd389738"} |
System Output |
{"Name":"tydw0051.png_NaR","BuyerName":"The Tobacco Institute","CustomerId":"cust_rnbufp","SenderName":"Peake Printers, Inc.","TotalAmount":203.52,"FilterResult":"Valid: clear, legible invoice; no tampering or malicious content detected.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[203.52, 192.00, 11.52]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Tobacco Institute
Actual: The Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 203.52
Actual: 203.52
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_rnbufp
Actual: cust_rnbufp
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Peake Printers, Inc.
Actual: Peake Printers, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/e864a8b66b451b74e272e4bb1b575edeb429a7494fabee5b66a221a0cd389738) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Peake Printers, Inc.",
"BuyerName": "The Tobacco Institute",
"TotalAmount": 203.52,
"NeedsApproval": true,
"CustomerId": "cust_rnbufp",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶xkfp0094.png |
valid | ✅ | ❌ | ✅ | ✅ | 0.83 | |
Task Input |
{"Name":"xkfp0094.png","Invoice":"https://storage.googleapis.com/llm-test-files/910194ccb2ddc83f814044dffbdb54d7cc5aa67fe12c1e9cfd7468f614b1f10a"} |
System Output |
{"Name":"xkfp0094.png_73q","BuyerName":"Lorillard Tobacco Company Inc.","CustomerId":"cust_ljmyto","SenderName":"Bozell Worldwide, Inc.","TotalAmount":91343.91,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"91,343.91\",\"0.00\",\"0.00\",\"91,343.91\",\"91,343.91\",\"379,065.14\",\"179,654.50\",\"39,806.70\",\"159,694.94\"]"} |
Explanation |
TotalAmount mismatch: expected 91943.91; actual 91343.91.
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Tobacco Company Inc.
Actual: Lorillard Tobacco Company Inc.
|
?Total Correct? |
❌
Incorrect
Expected: 91943.91
Actual: 91343.91
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_ljmyto
Actual: cust_ljmyto
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Bozell Worldwide, Inc.
Actual: Bozell Worldwide, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
❌ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/910194ccb2ddc83f814044dffbdb54d7cc5aa67fe12c1e9cfd7468f614b1f10a) |
?Partial Extraction Match? |
|
?Weighted score |
0.83 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Bozell Worldwide, Inc.",
"BuyerName": "Lorillard Tobacco Company Inc.",
"TotalAmount": 91943.91,
"NeedsApproval": true,
"CustomerId": "cust_ljmyto",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶xmjb0108.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"xmjb0108.png","Invoice":"https://storage.googleapis.com/llm-test-files/526a40767cf43ce0e0c90d5ad7583042fca0da63f3353a35d15603bda8b07489"} |
System Output |
{"Name":"xmjb0108.png_bZh","BuyerName":"Lorillard Research Center","CustomerId":"cust_dnwalc","SenderName":"Microbiological Associates Inc.","TotalAmount":3825.5,"FilterResult":"Valid – no signs of tampering/illegible/malicious content","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$3,825.50\",\"3,825.50\",\"3,825.50\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Lorillard Research Center
Actual: Lorillard Research Center
|
?Total Correct? |
✅
Correct
Expected: 3825.5
Actual: 3825.5
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_dnwalc
Actual: cust_dnwalc
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Microbiological Associates Inc.
Actual: Microbiological Associates Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/526a40767cf43ce0e0c90d5ad7583042fca0da63f3353a35d15603bda8b07489) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Microbiological Associates Inc.",
"BuyerName": "Lorillard Research Center",
"TotalAmount": 3825.5,
"NeedsApproval": true,
"CustomerId": "cust_dnwalc",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶xnnj0030.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | buyerName:
Expected: "Vincent J. Weiner, Philip Morris U.S.A."
Actual: "Philip Morris U.S.A."
senderName:
Expected: "Atlanta Braves"
Actual: "The Atlanta Braves"
|
Task Input |
{"Name":"xnnj0030.png","Invoice":"https://storage.googleapis.com/llm-test-files/c17d186d96b6ae9f55bd80c252d0c617b23dbb2363c7ad7e873acb225222d773"} |
System Output |
{"Name":"xnnj0030.png_d9-","BuyerName":"Philip Morris U.S.A.","CustomerId":"cust_vlbzaw","SenderName":"The Atlanta Braves","TotalAmount":15725,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"18500.00\",\"15725.00\",\"15785.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Vincent J. Weiner, Philip Morris U.S.A.
Actual: Philip Morris U.S.A.
|
?Total Correct? |
✅
Correct
Expected: 15725
Actual: 15725
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_vlbzaw
Actual: cust_vlbzaw
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Atlanta Braves
Actual: The Atlanta Braves
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/c17d186d96b6ae9f55bd80c252d0c617b23dbb2363c7ad7e873acb225222d773) |
?Partial Extraction Match? |
buyerName:
Expected: "Vincent J. Weiner, Philip Morris U.S.A."
Actual: "Philip Morris U.S.A."
senderName:
Expected: "Atlanta Braves"
Actual: "The Atlanta Braves"
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Atlanta Braves",
"BuyerName": "Vincent J. Weiner, Philip Morris U.S.A.",
"TotalAmount": 15725,
"NeedsApproval": true,
"CustomerId": "cust_vlbzaw",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶xpnx0025.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | buyerName:
Expected: "Phillip Morris"
Actual: "Philip Morris"
|
Task Input |
{"Name":"xpnx0025.png","Invoice":"https://storage.googleapis.com/llm-test-files/849e229f681bff72d70e6d14d1b0c581ed5d40e00bbe8fc98684c45323819c86"} |
System Output |
{"Name":"xpnx0025.png_AEd","BuyerName":"Philip Morris","CustomerId":"cust_rdwbrr","SenderName":"Metromedia Technologies, Inc.","TotalAmount":225.22,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[{\"label\":\"Sales Total\",\"amount\":0.00},{\"label\":\"Trade Disc\",\"amount\":0.00},{\"label\":\"Freight\",\"amount\":225.22},{\"label\":\"Misc Charge\",\"amount\":0.00},{\"label\":\"Tax Total\",\"amount\":0.00},{\"label\":\"TOTAL\",\"amount\":225.22}]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Phillip Morris
Actual: Philip Morris
|
?Total Correct? |
✅
Correct
Expected: 225.22
Actual: 225.22
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_rdwbrr
Actual: cust_rdwbrr
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Metromedia Technologies, Inc.
Actual: Metromedia Technologies, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/849e229f681bff72d70e6d14d1b0c581ed5d40e00bbe8fc98684c45323819c86) |
?Partial Extraction Match? |
buyerName:
Expected: "Phillip Morris"
Actual: "Philip Morris"
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Metromedia Technologies, Inc.",
"BuyerName": "Phillip Morris",
"TotalAmount": 225.22,
"NeedsApproval": true,
"CustomerId": "cust_rdwbrr",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶yjhd0006.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"yjhd0006.png","Invoice":"https://storage.googleapis.com/llm-test-files/f2a5bdb1deaa77f0e7d3911ed6dca021f1b4f89ba7d258c7e193990aedd87e29"} |
System Output |
{"Name":"yjhd0006.png_izi","BuyerName":"Weber Shandwick","CustomerId":"cust_njoyxv","SenderName":"Doot-Russell, Inc.","TotalAmount":480,"FilterResult":"Valid invoice. Legible, no signs of tampering or malicious content. Proceed.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"480.00\",\"480.00\",\"$480.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Weber Shandwick
Actual: Weber Shandwick
|
?Total Correct? |
✅
Correct
Expected: 480
Actual: 480
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_njoyxv
Actual: cust_njoyxv
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: DOOT-RUSSELL, INC.
Actual: Doot-Russell, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/f2a5bdb1deaa77f0e7d3911ed6dca021f1b4f89ba7d258c7e193990aedd87e29) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "DOOT-RUSSELL, INC.",
"BuyerName": "Weber Shandwick",
"TotalAmount": 480,
"NeedsApproval": true,
"CustomerId": "cust_njoyxv",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶ylfh0144.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"ylfh0144.png","Invoice":"https://storage.googleapis.com/llm-test-files/d9c7f923cdf6ff48f1ac8086ab8affe4d1c5350079194189d4ada2947f91c5ab"} |
System Output |
{"Name":"ylfh0144.png_EJg","BuyerName":"American Tobacco Company","CustomerId":"cust_reafqi","SenderName":"The Rowland Company, Inc.","TotalAmount":6000,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$6,000.00\",\"$6,000.00\",\"$6,000.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: American Tobacco Company
Actual: American Tobacco Company
|
?Total Correct? |
✅
Correct
Expected: 6000
Actual: 6000
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_reafqi
Actual: cust_reafqi
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: The Rowland Company, Inc.
Actual: The Rowland Company, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/d9c7f923cdf6ff48f1ac8086ab8affe4d1c5350079194189d4ada2947f91c5ab) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "The Rowland Company, Inc.",
"BuyerName": "American Tobacco Company",
"TotalAmount": 6000,
"NeedsApproval": true,
"CustomerId": "cust_reafqi",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶ymlb0132.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"ymlb0132.png","Invoice":"https://storage.googleapis.com/llm-test-files/018a57610cb0b3520cdc353ccd5f4146003dda9652281b777778ac03855ef771"} |
System Output |
{"Name":"ymlb0132.png_lCs","BuyerName":"BATUS Inc","CustomerId":"cust_ffifzq","SenderName":"Johnson & Higgins","TotalAmount":14037.3,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"15,597.00CR\",\"1,559.70\",\"14,037.30CR\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: BATUS INC
Actual: BATUS Inc
|
?Total Correct? |
✅
Correct
Expected: 14037.3
Actual: 14037.3
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_ffifzq
Actual: cust_ffifzq
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Johnson & Higgins
Actual: Johnson & Higgins
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/018a57610cb0b3520cdc353ccd5f4146003dda9652281b777778ac03855ef771) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Johnson & Higgins",
"BuyerName": "BATUS INC",
"TotalAmount": 14037.3,
"NeedsApproval": true,
"CustomerId": "cust_ffifzq",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶zhvg0007.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"zhvg0007.png","Invoice":"https://storage.googleapis.com/llm-test-files/85688dc4688513b4b2df981d49fca0a9aef13acdd981ff66423d6f5045f51de1"} |
System Output |
{"Name":"zhvg0007.png_ZFY","BuyerName":"Philip Morris Incorporated","CustomerId":"cust_ujwjmw","SenderName":"Gary M. Reynolds & Associates, Inc.","TotalAmount":81333,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[\"$81,333.00\",\"$81,333.00\"]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Philip Morris Incorporated
Actual: Philip Morris Incorporated
|
?Total Correct? |
✅
Correct
Expected: 81333
Actual: 81333
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_ujwjmw
Actual: cust_ujwjmw
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Gary M. Reynolds & Associates, Inc.
Actual: Gary M. Reynolds & Associates, Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/85688dc4688513b4b2df981d49fca0a9aef13acdd981ff66423d6f5045f51de1) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Gary M. Reynolds & Associates, Inc.",
"BuyerName": "Philip Morris Incorporated",
"TotalAmount": 81333,
"NeedsApproval": true,
"CustomerId": "cust_ujwjmw",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶zmxf0112.png |
valid | ✅ | ✅ | ✅ | ✅ | 0.99 | senderName:
Expected: "CRC Contract Research Center"
Actual: "CRC Contract Research Center B.V.B.A./S.P.R.L."
|
Task Input |
{"Name":"zmxf0112.png","Invoice":"https://storage.googleapis.com/llm-test-files/8f00d8dde0d73ef162a2d533b672f7fbb901407b66f5190c48ae88db396bdf74"} |
System Output |
{"Name":"zmxf0112.png_X8T","BuyerName":"INBIFO Institut für biologische Forschung","CustomerId":"cust_dibqxu","SenderName":"CRC Contract Research Center B.V.B.A./S.P.R.L.","TotalAmount":8554,"FilterResult":"Valid","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[{\"text\":\"8.554,-\",\"currency\":\"BEC\",\"context\":\"line item price\"},{\"text\":\"8.554,-\",\"currency\":\"BEC\",\"context\":\"TOTAL\"},{\"text\":\"413,84\",\"currency\":\"DEM\",\"context\":\"TOTAL (converted)\"}]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: INBIFO Institut für biologische Forschung
Actual: INBIFO Institut für biologische Forschung
|
?Total Correct? |
✅
Correct
Expected: 8554
Actual: 8554
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_dibqxu
Actual: cust_dibqxu
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: CRC Contract Research Center
Actual: CRC Contract Research Center B.V.B.A./S.P.R.L.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/8f00d8dde0d73ef162a2d533b672f7fbb901407b66f5190c48ae88db396bdf74) |
?Partial Extraction Match? |
senderName:
Expected: "CRC Contract Research Center"
Actual: "CRC Contract Research Center B.V.B.A./S.P.R.L."
|
?Weighted score |
0.99 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "CRC Contract Research Center",
"BuyerName": "INBIFO Institut für biologische Forschung",
"TotalAmount": 8554,
"NeedsApproval": true,
"CustomerId": "cust_dibqxu",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶zpgl0036.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"zpgl0036.png","Invoice":"https://storage.googleapis.com/llm-test-files/263d4af4973bb218a9dfee1cff6a7e82f7686d1d6ec99782ac82f6fc6b165d9d"} |
System Output |
{"Name":"zpgl0036.png_WHe","BuyerName":"The Tobacco Institute","CustomerId":"cust_orotdd","SenderName":"RealCom Office Communications Inc","TotalAmount":1284.8,"FilterResult":"Valid: clear, legible invoice; no tampering or malicious content detected.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[1267.18,654.52,612.66,311,265.2,35,60.94,672.14,1284.8]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: The Tobacco Institute
Actual: The Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 1284.8
Actual: 1284.8
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_orotdd
Actual: cust_orotdd
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: RealCom Office Communications Inc
Actual: RealCom Office Communications Inc
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/263d4af4973bb218a9dfee1cff6a7e82f7686d1d6ec99782ac82f6fc6b165d9d) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "RealCom Office Communications Inc",
"BuyerName": "The Tobacco Institute",
"TotalAmount": 1284.8,
"NeedsApproval": true,
"CustomerId": "cust_orotdd",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|
▶zxbh0038.png |
valid | ✅ | ✅ | ✅ | ✅ | 1.00 | |
Task Input |
{"Name":"zxbh0038.png","Invoice":"https://storage.googleapis.com/llm-test-files/afe25e26035df93284b3fa8aa21a8f07fbe0ee78ffa3d365890c5d4810c65d78"} |
System Output |
{"Name":"zxbh0038.png_7MM","BuyerName":"Tobacco Institute","CustomerId":"cust_basfwy","SenderName":"Fannon-Luers Associates Inc.","TotalAmount":102,"FilterResult":"Valid: legible, untampered invoice with no malicious content.","ManagerEmail":"[email protected]","NeedsApproval":true,"BillingAmounts":"[102.00, 0.00, 102.00]"} |
Explanation |
No explanation available
|
?Valid? |
valid |
?Buyer Correct? |
✅
Correct
Expected: Tobacco Institute
Actual: Tobacco Institute
|
?Total Correct? |
✅
Correct
Expected: 102
Actual: 102
|
?Approval Correct? |
✅
Correct
Expected: true
Actual: true
|
?Customer ID Correct? |
✅
Correct
Expected: cust_basfwy
Actual: cust_basfwy
|
?Manager Email Correct? |
|
?Sender Name Correct? |
✅
Correct
Expected: Fannon-Luers Associates Inc.
Actual: Fannon-Luers Associates Inc.
|
?Input validation correct? |
✅ |
?Extractions correct? |
✅ |
?Lookup correct? |
✅ |
?image |
 _ Document Processing Benchmark_files/afe25e26035df93284b3fa8aa21a8f07fbe0ee78ffa3d365890c5d4810c65d78) |
?Partial Extraction Match? |
|
?Weighted score |
1.00 |
?Validation prediction |
valid |
?expectedOutput |
{
"SenderName": "Fannon-Luers Associates Inc.",
"BuyerName": "Tobacco Institute",
"TotalAmount": 102,
"NeedsApproval": true,
"CustomerId": "cust_basfwy",
"ManagerEmail": "[email protected]"
} |
Trace |
Trace |
Status |
evalSuccess |
|